bao_tree/
iter.rs

1//! Iterators over BaoTree nodes
2//!
3//! Range iterators take a reference to the ranges, and therefore require a lifetime parameter.
4//! They can be used without lifetime parameters using self referencing structs.
5use std::fmt::{self, Debug};
6
7use self_cell::self_cell;
8use smallvec::SmallVec;
9
10use crate::{split, BaoTree, BlockSize, ChunkNum, ChunkRanges, ChunkRangesRef, TreeNode};
11
12/// Extended node info.
13///
14/// Some of the information is redundant, but it is convenient to have it all in one place.
15///
16/// Usually this is used within an iterator, so we hope that the compiler will optimize away
17/// the redundant information.
18#[derive(Debug, PartialEq, Eq)]
19#[cfg(test)]
20pub struct NodeInfo<'a> {
21    /// the node
22    pub node: TreeNode,
23    /// the node is the root node (needs special handling when computing hash)
24    pub is_root: bool,
25    /// ranges of the node and it's two children
26    pub ranges: &'a ChunkRangesRef,
27    /// left child intersection with the query range
28    pub l_ranges: &'a ChunkRangesRef,
29    /// right child intersection with the query range
30    pub r_ranges: &'a ChunkRangesRef,
31    /// the node is fully included in the query range
32    pub full: bool,
33    /// the node is a leaf for the purpose of this query
34    pub query_leaf: bool,
35    /// true if this node is the last leaf, and it is <= half full
36    pub is_half_leaf: bool,
37}
38
39/// Iterator over all nodes in a BaoTree in pre-order that overlap with a given chunk range.
40///
41/// This is mostly used internally
42#[derive(Debug)]
43#[cfg(test)]
44pub struct PreOrderPartialIterRef<'a> {
45    /// the tree we want to traverse
46    tree: BaoTree,
47    /// the minimum level to always emit, even if the node is fully within the query range
48    min_level: u8,
49    /// stack of nodes to visit, together with the ranges that are relevant for the node
50    ///
51    /// The node is shifted by the block size, so these are not normal nodes!
52    stack: SmallVec<[(TreeNode, &'a ChunkRangesRef); 8]>,
53    /// number of valid nodes, needed in shifted.right_descendant
54    ///
55    /// This is also shifted by the block size!
56    shifted_filled_size: TreeNode,
57    /// The root node, shifted by the block size, needed for the is_root check
58    shifted_root: TreeNode,
59}
60
61#[cfg(test)]
62impl<'a> PreOrderPartialIterRef<'a> {
63    /// Create a new iterator over the tree.
64    pub fn new(tree: BaoTree, ranges: &'a ChunkRangesRef, min_level: u8) -> Self {
65        let mut stack = SmallVec::new();
66        let (shifted_root, shifted_filled_size) = tree.shifted();
67        stack.push((shifted_root, ranges));
68        Self {
69            tree,
70            min_level,
71            stack,
72            shifted_filled_size,
73            shifted_root,
74        }
75    }
76
77    /// Get a reference to the tree.
78    pub fn tree(&self) -> &BaoTree {
79        &self.tree
80    }
81}
82
83#[cfg(test)]
84impl<'a> Iterator for PreOrderPartialIterRef<'a> {
85    type Item = NodeInfo<'a>;
86
87    fn next(&mut self) -> Option<Self::Item> {
88        let tree = &self.tree;
89        loop {
90            let (shifted, ranges) = self.stack.pop()?;
91            if ranges.is_empty() {
92                continue;
93            }
94            let node = shifted.subtract_block_size(tree.block_size.0);
95            let is_half_leaf = !tree.is_persisted(node);
96            let (l_ranges, r_ranges) = if !is_half_leaf {
97                // split the ranges into left and right
98                split(ranges, node)
99            } else {
100                (ranges, ranges)
101            };
102            // check if the node is fully included
103            let full = ranges.is_all();
104            // we can't recurse if the node is a leaf
105            // we don't want to recurse if the node is full and below the minimum level
106            let query_leaf = shifted.is_leaf() || (full && node.level() < self.min_level as u32);
107            // recursion is just pushing the children onto the stack
108            if !query_leaf {
109                let l = shifted.left_child().unwrap();
110                let r = shifted.right_descendant(self.shifted_filled_size).unwrap();
111                // push right first so we pop left first
112                self.stack.push((r, r_ranges));
113                self.stack.push((l, l_ranges));
114            }
115            // the first node is the root, so just set the flag to false afterwards
116            let is_root = shifted == self.shifted_root;
117            // emit the node in any case
118            break Some(NodeInfo {
119                node,
120                ranges,
121                l_ranges,
122                r_ranges,
123                full,
124                query_leaf,
125                is_root,
126                is_half_leaf,
127            });
128        }
129    }
130}
131
132/// Iterator over all nodes in a tree in post-order.
133///
134/// If you want to iterate only down to some level, you need to shift the tree
135/// before.
136#[derive(Debug)]
137pub struct PostOrderNodeIter {
138    /// the overall number of nodes in the tree
139    len: TreeNode,
140    /// the current node, None if we are done
141    curr: TreeNode,
142    /// where we came from, used to determine the next node
143    prev: Prev,
144}
145
146impl PostOrderNodeIter {
147    /// Create a new iterator given a root node and a len
148    pub fn new(root: TreeNode, len: TreeNode) -> Self {
149        Self {
150            curr: root,
151            len,
152            prev: Prev::Parent,
153        }
154    }
155
156    fn go_up(&mut self, curr: TreeNode) {
157        let prev = curr;
158        (self.curr, self.prev) = if let Some(parent) = curr.restricted_parent(self.len) {
159            (
160                parent,
161                if prev < parent {
162                    Prev::Left
163                } else {
164                    Prev::Right
165                },
166            )
167        } else {
168            (curr, Prev::Done)
169        };
170    }
171}
172
173impl Iterator for PostOrderNodeIter {
174    type Item = TreeNode;
175
176    fn next(&mut self) -> Option<Self::Item> {
177        loop {
178            let curr = self.curr;
179            match self.prev {
180                Prev::Parent => {
181                    if let Some(child) = curr.left_child() {
182                        // go left first when coming from above, don't emit curr
183                        self.curr = child;
184                        self.prev = Prev::Parent;
185                    } else {
186                        // we are a left or right leaf, go up and emit curr
187                        self.go_up(curr);
188                        break Some(curr);
189                    }
190                }
191                Prev::Left => {
192                    // no need to check is_leaf, since we come from a left child
193                    // go right when coming from left, don't emit curr
194                    self.curr = curr.right_descendant(self.len).unwrap();
195                    self.prev = Prev::Parent;
196                }
197                Prev::Right => {
198                    // go up in any case, do emit curr
199                    self.go_up(curr);
200                    break Some(curr);
201                }
202                Prev::Done => {
203                    break None;
204                }
205            }
206        }
207    }
208}
209
210/// Iterator over all nodes in a BaoTree in pre-order.
211#[derive(Debug)]
212pub struct PreOrderNodeIter {
213    /// the overall number of nodes in the tree
214    len: TreeNode,
215    /// the current node, None if we are done
216    curr: TreeNode,
217    /// where we came from, used to determine the next node
218    prev: Prev,
219}
220
221impl PreOrderNodeIter {
222    /// Create a new iterator given a root node and a len
223    pub fn new(root: TreeNode, len: TreeNode) -> Self {
224        Self {
225            curr: root,
226            len,
227            prev: Prev::Parent,
228        }
229    }
230
231    fn go_up(&mut self, curr: TreeNode) {
232        let prev = curr;
233        (self.curr, self.prev) = if let Some(parent) = curr.restricted_parent(self.len) {
234            (
235                parent,
236                if prev < parent {
237                    Prev::Left
238                } else {
239                    Prev::Right
240                },
241            )
242        } else {
243            (curr, Prev::Done)
244        };
245    }
246}
247
248impl Iterator for PreOrderNodeIter {
249    type Item = TreeNode;
250
251    fn next(&mut self) -> Option<Self::Item> {
252        loop {
253            let curr = self.curr;
254            match self.prev {
255                Prev::Parent => {
256                    if let Some(child) = curr.left_child() {
257                        // go left first when coming from above
258                        self.curr = child;
259                        self.prev = Prev::Parent;
260                    } else {
261                        // we are a left or right leaf, go up
262                        self.go_up(curr);
263                    }
264                    // emit curr before children (pre-order)
265                    break Some(curr);
266                }
267                Prev::Left => {
268                    // no need to check is_leaf, since we come from a left child
269                    // go right when coming from left, don't emit curr
270                    self.curr = curr.right_descendant(self.len).unwrap();
271                    self.prev = Prev::Parent;
272                }
273                Prev::Right => {
274                    // go up in any case
275                    self.go_up(curr);
276                }
277                Prev::Done => {
278                    break None;
279                }
280            }
281        }
282    }
283}
284
285#[derive(Debug)]
286enum Prev {
287    Parent,
288    Left,
289    Right,
290    Done,
291}
292
293#[derive(Debug, Clone, Copy, PartialEq, Eq)]
294/// A chunk describeds what to read or write next
295///
296/// In some cases you want additional information about what part of the chunk matches the query.
297/// That is what the `R` type parameter is for. By default it is `()`.
298pub enum BaoChunk<R = ()> {
299    /// expect a 64 byte parent node.
300    ///
301    /// To validate, use parent_cv using the is_root value
302    Parent {
303        /// The tree node, useful for error reporting
304        node: TreeNode,
305        /// This is the root, to be passed to parent_cv
306        is_root: bool,
307        /// Push the left hash to the stack, since it will be needed later
308        left: bool,
309        /// Push the right hash to the stack, since it will be needed later
310        right: bool,
311        /// Additional information about what part of the chunk matches the query
312        ranges: R,
313    },
314    /// expect data of size `size`
315    ///
316    /// To validate, use hash_block using the is_root and start_chunk values
317    Leaf {
318        /// Start chunk, to be passed to hash_block
319        start_chunk: ChunkNum,
320        /// Size of the data to expect. Will be chunk_group_bytes for all but the last block.
321        size: usize,
322        /// This is the root, to be passed to hash_block
323        is_root: bool,
324        /// Additional information about what part of the chunk matches the query
325        ranges: R,
326    },
327}
328
329impl<T> BaoChunk<T> {
330    /// Produce a nice debug string for the chunk
331    #[cfg(test)]
332    pub fn to_debug_string(&self, max_level: usize) -> String {
333        match self {
334            BaoChunk::Parent { node, is_root, .. } => {
335                let n = max_level.saturating_sub(node.level() as usize + 1);
336                let prefix = " ".repeat(n);
337                let start_chunk = node.chunk_range().start;
338                format!(
339                    "{}{},{},{}",
340                    prefix,
341                    start_chunk.to_bytes(),
342                    node.level(),
343                    is_root
344                )
345            }
346            BaoChunk::Leaf {
347                start_chunk,
348                size,
349                is_root,
350                ..
351            } => {
352                let prefix = " ".repeat(max_level);
353                format!("{}{},{},{}", prefix, start_chunk.to_bytes(), size, is_root)
354            }
355        }
356    }
357
358    /// Set the ranges to the unit value
359    pub fn without_ranges(&self) -> BaoChunk {
360        match self {
361            Self::Parent {
362                node,
363                is_root,
364                left,
365                right,
366                ..
367            } => BaoChunk::Parent {
368                node: *node,
369                is_root: *is_root,
370                left: *left,
371                right: *right,
372                ranges: (),
373            },
374            Self::Leaf {
375                start_chunk,
376                size,
377                is_root,
378                ..
379            } => BaoChunk::Leaf {
380                start_chunk: *start_chunk,
381                size: *size,
382                is_root: *is_root,
383                ranges: (),
384            },
385        }
386    }
387}
388
389/// Iterator over all chunks in a BaoTree in post-order.
390#[derive(Debug)]
391pub struct PostOrderChunkIter {
392    tree: BaoTree,
393    inner: PostOrderNodeIter,
394    // stack with 2 elements, since we can only have 2 items in flight
395    stack: SmallVec<[BaoChunk; 2]>,
396    shifted_root: TreeNode,
397}
398
399impl PostOrderChunkIter {
400    /// Create a new iterator over the tree.
401    pub fn new(tree: BaoTree) -> Self {
402        let (shifted_root, shifted_len) = tree.shifted();
403        let inner = PostOrderNodeIter::new(shifted_root, shifted_len);
404        Self {
405            tree,
406            inner,
407            stack: Default::default(),
408            shifted_root,
409        }
410    }
411}
412
413impl Iterator for PostOrderChunkIter {
414    type Item = BaoChunk;
415
416    fn next(&mut self) -> Option<Self::Item> {
417        loop {
418            if let Some(item) = self.stack.pop() {
419                return Some(item);
420            }
421            let shifted = self.inner.next()?;
422            // the is_root check needs to be done before shifting the node
423            let is_root = shifted == self.shifted_root;
424            let node = shifted.subtract_block_size(self.tree.block_size.0);
425            if shifted.is_leaf() {
426                let tree = &self.tree;
427                let (s, m, e) = tree.leaf_byte_ranges3(node);
428                let l_start_chunk = node.chunk_range().start;
429                let r_start_chunk = l_start_chunk + tree.chunk_group_chunks();
430                let is_half_leaf = m == e;
431                // for the half leaf we emit just the leaf
432                // for all other leaves we emit the parent and two leaves
433                if !is_half_leaf {
434                    self.stack.push(BaoChunk::Parent {
435                        node,
436                        is_root,
437                        left: true,
438                        right: true,
439                        ranges: (),
440                    });
441                    self.stack.push(BaoChunk::Leaf {
442                        is_root: false,
443                        start_chunk: r_start_chunk,
444                        size: (e - m).try_into().unwrap(),
445                        ranges: (),
446                    });
447                };
448                break Some(BaoChunk::Leaf {
449                    is_root: is_root && is_half_leaf,
450                    start_chunk: l_start_chunk,
451                    size: (m - s).try_into().unwrap(),
452                    ranges: (),
453                });
454            } else {
455                self.stack.push(BaoChunk::Parent {
456                    node,
457                    is_root,
458                    left: true,
459                    right: true,
460                    ranges: (),
461                });
462            }
463        }
464    }
465}
466
467impl BaoChunk {
468    /// Return the size of the chunk in bytes.
469    pub fn size(&self) -> usize {
470        match self {
471            Self::Parent { .. } => 64,
472            Self::Leaf { size, .. } => *size,
473        }
474    }
475}
476
477impl<T: Default> Default for BaoChunk<T> {
478    fn default() -> Self {
479        Self::Leaf {
480            is_root: true,
481            size: 0,
482            start_chunk: ChunkNum(0),
483            ranges: T::default(),
484        }
485    }
486}
487
488/// Iterator over all nodes in a BaoTree in pre-order that overlap with a given chunk range.
489///
490/// This is mostly used internally
491#[derive(Debug)]
492pub struct PreOrderPartialChunkIterRef<'a> {
493    /// the tree we want to traverse
494    tree: BaoTree,
495    /// the minimum level to always emit, even if the node is fully within the query range
496    min_full_level: u8,
497    /// stack of nodes to visit, together with the ranges that are relevant for the node
498    ///
499    /// The node is shifted by the block size, so these are not normal nodes!
500    stack: SmallVec<[(TreeNode, &'a ChunkRangesRef); 8]>,
501    /// number of valid nodes, needed in shifted.right_descendant
502    ///
503    /// This is also shifted by the block size!
504    shifted_filled_size: TreeNode,
505    /// The root node, shifted by the block size, needed for the is_root check
506    shifted_root: TreeNode,
507    /// chunk buffer. This will only ever contain leaves, and will never be more than 2 elements
508    buffer: SmallVec<[BaoChunk<&'a ChunkRangesRef>; 2]>,
509}
510
511impl<'a> PreOrderPartialChunkIterRef<'a> {
512    /// Create a new iterator over the tree.
513    pub fn new(tree: BaoTree, ranges: &'a ChunkRangesRef, min_full_level: u8) -> Self {
514        let mut stack = SmallVec::new();
515        let (shifted_root, shifted_filled_size) = tree.shifted();
516        stack.push((shifted_root, ranges));
517        Self {
518            tree,
519            min_full_level,
520            stack,
521            shifted_filled_size,
522            shifted_root,
523            buffer: SmallVec::new(),
524        }
525    }
526
527    /// Get a reference to the tree.
528    pub fn tree(&self) -> &BaoTree {
529        &self.tree
530    }
531
532    /// Get the minimum level to always emit, even if the node is fully within the query range
533    pub fn min_full_level(&self) -> u8 {
534        self.min_full_level
535    }
536}
537
538impl<'a> Iterator for PreOrderPartialChunkIterRef<'a> {
539    type Item = BaoChunk<&'a ChunkRangesRef>;
540
541    fn next(&mut self) -> Option<Self::Item> {
542        if let Some(item) = self.buffer.pop() {
543            return Some(item);
544        }
545        let tree = &self.tree;
546        let (shifted, ranges) = self.stack.pop()?;
547        debug_assert!(!ranges.is_empty());
548        let node = shifted.subtract_block_size(tree.block_size.0);
549        // we don't want to recurse if the node is full and below the minimum level
550        let ranges_is_all = ranges.is_all();
551        let below_min_full_level = node.level() < self.min_full_level as u32;
552        let query_leaf = ranges_is_all && below_min_full_level;
553        // check if the node is the root by comparing the shifted node to the shifted root
554        let is_root = shifted == self.shifted_root;
555        let chunk_range = node.chunk_range();
556        let byte_range = tree.byte_range(node);
557        let size = (byte_range.end - byte_range.start).try_into().unwrap();
558        // There are three cases.
559        if query_leaf {
560            // The node is a query leaf, meaning that we stop descending because the
561            // node is fully within the query range and it's level is below min_full_level.
562            // This can be fully disabled by setting min_full_level to 0.
563            //
564            // In this case we just emit the range of the node, and don't recurse.
565            Some(BaoChunk::Leaf {
566                start_chunk: chunk_range.start,
567                size,
568                is_root,
569                ranges,
570            })
571        } else if !shifted.is_leaf() {
572            // The node is either not fully within the query range, or it's level is above
573            // min_full_level. In this case we need to recurse.
574            let (l_ranges, r_ranges) = split(ranges, node);
575            // emit right child first, so it gets yielded last
576            if !r_ranges.is_empty() {
577                let r = shifted.right_descendant(self.shifted_filled_size).unwrap();
578                self.stack.push((r, r_ranges));
579            }
580            // emit left child second, so it gets yielded first
581            if !l_ranges.is_empty() {
582                let l = shifted.left_child().unwrap();
583                self.stack.push((l, l_ranges));
584            }
585            // immediately emit the parent
586            Some(BaoChunk::Parent {
587                node,
588                left: !l_ranges.is_empty(),
589                right: !r_ranges.is_empty(),
590                is_root,
591                ranges,
592            })
593        } else {
594            // The node is a real leaf.
595            //
596            // If it is a normal leaf and we got this far, we need to split it into 2 ranges.
597            // E.g. the first leaf of a tree covers the range 0..2048, but we want two 1024
598            // byte BLAKE3 chunks.
599            //
600            // There is a special case for the last leaf, if its right range is not within the
601            // tree. In this case we don't need to split it, and can just emit it as is.
602            let mid_chunk = node.mid();
603            let mid = mid_chunk.to_bytes();
604            if mid >= tree.size {
605                // this is the last leaf node, and only it's left part is in the range
606                // we can just emit it without splitting
607                Some(BaoChunk::Leaf {
608                    start_chunk: chunk_range.start,
609                    size,
610                    is_root,
611                    ranges,
612                })
613            } else {
614                let (l_ranges, r_ranges) = split(ranges, node);
615                // emit right range first, so it gets yielded last
616                if !r_ranges.is_empty() {
617                    self.buffer.push(BaoChunk::Leaf {
618                        start_chunk: mid_chunk,
619                        size: (byte_range.end - mid).try_into().unwrap(),
620                        is_root: false,
621                        ranges: r_ranges,
622                    });
623                }
624                // emit left range second, so it gets yielded first
625                if !l_ranges.is_empty() {
626                    self.buffer.push(BaoChunk::Leaf {
627                        start_chunk: chunk_range.start,
628                        size: (mid - byte_range.start).try_into().unwrap(),
629                        is_root: false,
630                        ranges: l_ranges,
631                    });
632                }
633                // immediately emit the parent
634                Some(BaoChunk::Parent {
635                    node,
636                    left: !l_ranges.is_empty(),
637                    right: !r_ranges.is_empty(),
638                    is_root,
639                    ranges,
640                })
641            }
642        }
643    }
644}
645
646/// An iterator that produces chunks in pre order.
647///
648/// This wraps a `PreOrderPartialIterRef` and iterates over the chunk groups
649/// all the way down to individual chunks if needed.
650#[derive(Debug)]
651pub struct ResponseIterRef<'a> {
652    inner: PreOrderPartialChunkIterRef<'a>,
653}
654
655impl<'a> ResponseIterRef<'a> {
656    /// Create a new iterator over the tree.
657    pub fn new(tree: BaoTree, ranges: &'a ChunkRangesRef) -> Self {
658        let tree1 = BaoTree::new(tree.size, BlockSize::ZERO);
659        Self {
660            inner: PreOrderPartialChunkIterRef::new(tree1, ranges, tree.block_size.0),
661        }
662    }
663
664    /// Return the underlying tree.
665    pub fn tree(&self) -> BaoTree {
666        // the inner iterator uses a tree with block size 0, so we need to return the original tree
667        BaoTree::new(
668            self.inner.tree().size,
669            BlockSize(self.inner.min_full_level()),
670        )
671    }
672}
673
674impl Iterator for ResponseIterRef<'_> {
675    type Item = BaoChunk;
676
677    fn next(&mut self) -> Option<Self::Item> {
678        Some(self.inner.next()?.without_ranges())
679    }
680}
681
682self_cell! {
683    pub(crate) struct ResponseIterInner {
684        owner: ChunkRanges,
685        #[not_covariant]
686        dependent: ResponseIterRef,
687    }
688}
689
690impl ResponseIterInner {
691    fn next(&mut self) -> Option<BaoChunk> {
692        self.with_dependent_mut(|_, iter| iter.next())
693    }
694
695    fn tree(&self) -> BaoTree {
696        self.with_dependent(|_, iter| iter.tree())
697    }
698}
699
700/// The owned version of `ResponseIterRef`.
701pub struct ResponseIter(ResponseIterInner);
702
703impl fmt::Debug for ResponseIter {
704    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
705        f.debug_struct("ResponseIter").finish_non_exhaustive()
706    }
707}
708
709impl ResponseIter {
710    /// Create a new iterator over the tree.
711    pub fn new(tree: BaoTree, ranges: ChunkRanges) -> Self {
712        Self(ResponseIterInner::new(ranges, |ranges| {
713            ResponseIterRef::new(tree, ranges)
714        }))
715    }
716
717    /// The tree this iterator is iterating over.
718    pub fn tree(&self) -> BaoTree {
719        self.0.tree()
720    }
721}
722
723impl Iterator for ResponseIter {
724    type Item = BaoChunk;
725
726    fn next(&mut self) -> Option<Self::Item> {
727        self.0.next()
728    }
729}