bao_tree/io/
sync.rs

1//! Sync IO operations
2//!
3//! The traits to perform positioned io are re-exported from
4//! [positioned-io](https://crates.io/crates/positioned-io).
5use std::{
6    io::{self, Read, Seek, Write},
7    result,
8};
9
10use bytes::BytesMut;
11pub use positioned_io::{ReadAt, Size, WriteAt};
12use smallvec::SmallVec;
13
14use super::{combine_hash_pair, BaoContentItem, DecodeError};
15pub use crate::rec::truncate_ranges;
16use crate::{
17    blake3, hash_subtree,
18    io::{
19        error::EncodeError,
20        outboard::{parse_hash_pair, PostOrderOutboard, PreOrderOutboard},
21        Leaf, Parent,
22    },
23    iter::{BaoChunk, ResponseIterRef},
24    parent_cv,
25    rec::encode_selected_rec,
26    BaoTree, BlockSize, ChunkRangesRef, TreeNode,
27};
28
29/// A binary merkle tree for blake3 hashes of a blob.
30///
31/// This trait contains information about the geometry of the tree, the root hash,
32/// and a method to load the hashes at a given node.
33///
34/// It is up to the implementor to decide how to store the hashes.
35///
36/// In the original bao crate, the hashes are stored in a file in pre order.
37/// This is implemented for a generic io object in [super::outboard::PreOrderOutboard]
38/// and for a memory region in [super::outboard::PreOrderMemOutboard].
39///
40/// For files that grow over time, it is more efficient to store the hashes in post order.
41/// This is implemented for a generic io object in [super::outboard::PostOrderOutboard]
42/// and for a memory region in [super::outboard::PostOrderMemOutboard].
43///
44/// If you use a different storage engine, you can implement this trait for it. E.g.
45/// you could store the hashes in a database and use the node number as the key.
46pub trait Outboard {
47    /// The root hash
48    fn root(&self) -> blake3::Hash;
49    /// The tree. This contains the information about the size of the file and the block size.
50    fn tree(&self) -> BaoTree;
51    /// load the hash pair for a node
52    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>>;
53}
54
55/// A mutable outboard.
56///
57/// This trait provides a way to save a hash pair for a node and to set the
58/// length of the data file.
59///
60/// This trait can be used to incrementally save an outboard when receiving data.
61/// If you want to just ignore outboard data, there is a special placeholder outboard
62/// implementation [super::outboard::EmptyOutboard].
63pub trait OutboardMut: Sized {
64    /// Save a hash pair for a node
65    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()>;
66
67    /// Sync the outboard.
68    fn sync(&mut self) -> io::Result<()>;
69}
70
71/// Convenience trait to initialize an outboard from a data source.
72///
73/// In complex real applications, you might want to do this manually.
74pub trait CreateOutboard {
75    /// Create an outboard from a data source.
76    fn create(mut data: impl Read + Seek, block_size: BlockSize) -> io::Result<Self>
77    where
78        Self: Default + Sized,
79    {
80        let size = data.seek(io::SeekFrom::End(0))?;
81        data.rewind()?;
82        Self::create_sized(data, size, block_size)
83    }
84
85    /// create an outboard from a data source. This requires the outboard to
86    /// have a default implementation, which is the case for the memory
87    /// implementations.
88    fn create_sized(data: impl Read, size: u64, block_size: BlockSize) -> io::Result<Self>
89    where
90        Self: Default + Sized;
91
92    /// Init the outboard from a data source. This will use the existing
93    /// tree and only init the data and set the root hash.
94    ///
95    /// So this can be used to initialize an outboard that does not have a default,
96    /// such as a file based one.
97    ///
98    /// It will only include data up the the current tree size.
99    fn init_from(&mut self, data: impl Read) -> io::Result<()>;
100}
101
102impl<O: OutboardMut> OutboardMut for &mut O {
103    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()> {
104        (**self).save(node, hash_pair)
105    }
106
107    fn sync(&mut self) -> io::Result<()> {
108        (**self).sync()
109    }
110}
111
112impl<O: Outboard> Outboard for &O {
113    fn root(&self) -> blake3::Hash {
114        (**self).root()
115    }
116    fn tree(&self) -> BaoTree {
117        (**self).tree()
118    }
119    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
120        (**self).load(node)
121    }
122}
123
124impl<O: Outboard> Outboard for &mut O {
125    fn root(&self) -> blake3::Hash {
126        (**self).root()
127    }
128    fn tree(&self) -> BaoTree {
129        (**self).tree()
130    }
131    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
132        (**self).load(node)
133    }
134}
135
136impl<R: ReadAt> Outboard for PreOrderOutboard<R> {
137    fn root(&self) -> blake3::Hash {
138        self.root
139    }
140
141    fn tree(&self) -> BaoTree {
142        self.tree
143    }
144
145    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
146        let Some(offset) = self.tree.pre_order_offset(node) else {
147            return Ok(None);
148        };
149        let offset = offset * 64;
150        let mut content = [0u8; 64];
151        self.data.read_exact_at(offset, &mut content)?;
152        Ok(Some(parse_hash_pair(content)))
153    }
154}
155
156impl<W: WriteAt> OutboardMut for PreOrderOutboard<W> {
157    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()> {
158        let Some(offset) = self.tree.pre_order_offset(node) else {
159            return Ok(());
160        };
161        let offset = offset * 64;
162        let mut content = [0u8; 64];
163        content[0..32].copy_from_slice(hash_pair.0.as_bytes());
164        content[32..64].copy_from_slice(hash_pair.1.as_bytes());
165        self.data.write_all_at(offset, &content)?;
166        Ok(())
167    }
168
169    fn sync(&mut self) -> io::Result<()> {
170        self.data.flush()
171    }
172}
173
174impl<W: WriteAt> CreateOutboard for PreOrderOutboard<W> {
175    fn create_sized(data: impl Read, size: u64, block_size: BlockSize) -> io::Result<Self>
176    where
177        Self: Default + Sized,
178    {
179        let tree = BaoTree::new(size, block_size);
180        let mut res = Self {
181            tree,
182            ..Default::default()
183        };
184        res.init_from(data)?;
185        res.sync()?;
186        Ok(res)
187    }
188
189    fn init_from(&mut self, data: impl Read) -> io::Result<()> {
190        let mut this = self;
191        let root = outboard(data, this.tree, &mut this)?;
192        this.root = root;
193        this.sync()?;
194        Ok(())
195    }
196}
197
198impl<W: WriteAt> CreateOutboard for PostOrderOutboard<W> {
199    fn create_sized(data: impl Read, size: u64, block_size: BlockSize) -> io::Result<Self>
200    where
201        Self: Default + Sized,
202    {
203        let tree = BaoTree::new(size, block_size);
204        let mut res = Self {
205            tree,
206            ..Default::default()
207        };
208        res.init_from(data)?;
209        res.sync()?;
210        Ok(res)
211    }
212
213    fn init_from(&mut self, data: impl Read) -> io::Result<()> {
214        let mut this = self;
215        let root = outboard(data, this.tree, &mut this)?;
216        this.root = root;
217        this.sync()?;
218        Ok(())
219    }
220}
221
222impl<W: WriteAt> OutboardMut for PostOrderOutboard<W> {
223    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()> {
224        let Some(offset) = self.tree.post_order_offset(node) else {
225            return Ok(());
226        };
227        let offset = offset.value() * 64;
228        let mut content = [0u8; 64];
229        content[0..32].copy_from_slice(hash_pair.0.as_bytes());
230        content[32..64].copy_from_slice(hash_pair.1.as_bytes());
231        self.data.write_all_at(offset, &content)?;
232        Ok(())
233    }
234
235    fn sync(&mut self) -> io::Result<()> {
236        self.data.flush()
237    }
238}
239
240impl<R: ReadAt> Outboard for PostOrderOutboard<R> {
241    fn root(&self) -> blake3::Hash {
242        self.root
243    }
244
245    fn tree(&self) -> BaoTree {
246        self.tree
247    }
248
249    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
250        let Some(offset) = self.tree.post_order_offset(node) else {
251            return Ok(None);
252        };
253        let offset = offset.value() * 64;
254        let mut content = [0u8; 64];
255        self.data.read_exact_at(offset, &mut content)?;
256        Ok(Some(parse_hash_pair(content)))
257    }
258}
259
260/// Iterator that can be used to decode a response to a range request
261#[derive(Debug)]
262pub struct DecodeResponseIter<'a, R> {
263    inner: ResponseIterRef<'a>,
264    stack: SmallVec<[blake3::Hash; 10]>,
265    encoded: R,
266    buf: BytesMut,
267}
268
269impl<'a, R: Read> DecodeResponseIter<'a, R> {
270    /// Create a new iterator to decode a response.
271    ///
272    /// For decoding you need to know the root hash, block size, and the ranges that were requested.
273    /// Additionally you need to provide a reader that can be used to read the encoded data.
274    pub fn new(root: blake3::Hash, tree: BaoTree, encoded: R, ranges: &'a ChunkRangesRef) -> Self {
275        let buf = BytesMut::with_capacity(tree.block_size().bytes());
276        Self::new_with_buffer(root, tree, encoded, ranges, buf)
277    }
278
279    /// Create a new iterator to decode a response.
280    ///
281    /// This is the same as [Self::new], but allows you to provide a buffer to use for decoding.
282    /// The buffer will be resized as needed, but it's capacity should be the [crate::BlockSize::bytes].
283    pub fn new_with_buffer(
284        root: blake3::Hash,
285        tree: BaoTree,
286        encoded: R,
287        ranges: &'a ChunkRangesRef,
288        buf: BytesMut,
289    ) -> Self {
290        let ranges = truncate_ranges(ranges, tree.size());
291        let mut stack = SmallVec::new();
292        stack.push(root);
293        Self {
294            stack,
295            inner: ResponseIterRef::new(tree, ranges),
296            encoded,
297            buf,
298        }
299    }
300
301    /// Get a reference to the buffer used for decoding.
302    pub fn buffer(&self) -> &[u8] {
303        &self.buf
304    }
305
306    /// Get a reference to the tree used for decoding.
307    ///
308    /// This is only available after the first chunk has been decoded.
309    pub fn tree(&self) -> BaoTree {
310        self.inner.tree()
311    }
312
313    fn next0(&mut self) -> result::Result<Option<BaoContentItem>, DecodeError> {
314        match self.inner.next() {
315            Some(BaoChunk::Parent {
316                is_root,
317                left,
318                right,
319                node,
320                ..
321            }) => {
322                let pair @ (l_hash, r_hash) = read_parent(&mut self.encoded)
323                    .map_err(|e| DecodeError::maybe_parent_not_found(e, node))?;
324                let parent_hash = self.stack.pop().unwrap();
325                let actual = parent_cv(&l_hash, &r_hash, is_root);
326                if parent_hash != actual {
327                    return Err(DecodeError::ParentHashMismatch(node));
328                }
329                if right {
330                    self.stack.push(r_hash);
331                }
332                if left {
333                    self.stack.push(l_hash);
334                }
335                Ok(Some(Parent { node, pair }.into()))
336            }
337            Some(BaoChunk::Leaf {
338                size,
339                is_root,
340                start_chunk,
341                ..
342            }) => {
343                self.buf.resize(size, 0);
344                self.encoded
345                    .read_exact(&mut self.buf)
346                    .map_err(|e| DecodeError::maybe_leaf_not_found(e, start_chunk))?;
347                let actual = hash_subtree(start_chunk.0, &self.buf, is_root);
348                let leaf_hash = self.stack.pop().unwrap();
349                if leaf_hash != actual {
350                    return Err(DecodeError::LeafHashMismatch(start_chunk));
351                }
352                Ok(Some(
353                    Leaf {
354                        offset: start_chunk.to_bytes(),
355                        data: self.buf.split().freeze(),
356                    }
357                    .into(),
358                ))
359            }
360            None => Ok(None),
361        }
362    }
363}
364
365impl<R: Read> Iterator for DecodeResponseIter<'_, R> {
366    type Item = result::Result<BaoContentItem, DecodeError>;
367
368    fn next(&mut self) -> Option<Self::Item> {
369        self.next0().transpose()
370    }
371}
372
373/// Encode ranges relevant to a query from a reader and outboard to a writer
374///
375/// This will not validate on writing, so data corruption will be detected on reading
376///
377/// It is possible to encode ranges from a partial file and outboard.
378/// This will either succeed if the requested ranges are all present, or fail
379/// as soon as a range is missing.
380pub fn encode_ranges<D: ReadAt + Size, O: Outboard, W: Write>(
381    data: D,
382    outboard: O,
383    ranges: &ChunkRangesRef,
384    encoded: W,
385) -> result::Result<(), EncodeError> {
386    let data = data;
387    let mut encoded = encoded;
388    let tree = outboard.tree();
389    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
390    for item in tree.ranges_pre_order_chunks_iter_ref(ranges, 0) {
391        match item {
392            BaoChunk::Parent { node, .. } => {
393                let (l_hash, r_hash) = outboard.load(node)?.unwrap();
394                let pair = combine_hash_pair(&l_hash, &r_hash);
395                encoded.write_all(&pair)?;
396            }
397            BaoChunk::Leaf {
398                start_chunk, size, ..
399            } => {
400                let start = start_chunk.to_bytes();
401                let buf = &mut buffer[..size];
402                data.read_exact_at(start, buf)?;
403                encoded.write_all(buf)?;
404            }
405        }
406    }
407    Ok(())
408}
409
410/// Encode ranges relevant to a query from a reader and outboard to a writer
411///
412/// This function validates the data before writing.
413///
414/// It is possible to encode ranges from a partial file and outboard.
415/// This will either succeed if the requested ranges are all present, or fail
416/// as soon as a range is missing.
417pub fn encode_ranges_validated<D: ReadAt, O: Outboard, W: Write>(
418    data: D,
419    outboard: O,
420    ranges: &ChunkRangesRef,
421    encoded: W,
422) -> result::Result<(), EncodeError> {
423    if ranges.is_empty() {
424        return Ok(());
425    }
426    let mut stack = SmallVec::<[blake3::Hash; 10]>::new();
427    stack.push(outboard.root());
428    let data = data;
429    let mut encoded = encoded;
430    let tree = outboard.tree();
431    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
432    let mut out_buf = Vec::new();
433    // canonicalize ranges
434    let ranges = truncate_ranges(ranges, tree.size());
435    for item in tree.ranges_pre_order_chunks_iter_ref(ranges, 0) {
436        match item {
437            BaoChunk::Parent {
438                is_root,
439                left,
440                right,
441                node,
442                ..
443            } => {
444                let (l_hash, r_hash) = outboard.load(node)?.unwrap();
445                let actual = parent_cv(&l_hash, &r_hash, is_root);
446                let expected = stack.pop().unwrap();
447                if actual != expected {
448                    return Err(EncodeError::ParentHashMismatch(node));
449                }
450                if right {
451                    stack.push(r_hash);
452                }
453                if left {
454                    stack.push(l_hash);
455                }
456                let pair = combine_hash_pair(&l_hash, &r_hash);
457                encoded.write_all(&pair)?;
458            }
459            BaoChunk::Leaf {
460                start_chunk,
461                size,
462                is_root,
463                ranges,
464                ..
465            } => {
466                let expected = stack.pop().unwrap();
467                let start = start_chunk.to_bytes();
468                let buf = &mut buffer[..size];
469                data.read_exact_at(start, buf)?;
470                let (actual, to_write) = if !ranges.is_all() {
471                    // we need to encode just a part of the data
472                    //
473                    // write into an out buffer to ensure we detect mismatches
474                    // before writing to the output.
475                    out_buf.clear();
476                    let actual = encode_selected_rec(
477                        start_chunk,
478                        buf,
479                        is_root,
480                        ranges,
481                        tree.block_size.to_u32(),
482                        true,
483                        &mut out_buf,
484                    );
485                    (actual, &out_buf[..])
486                } else {
487                    let actual = hash_subtree(start_chunk.0, buf, is_root);
488                    #[allow(clippy::redundant_slicing)]
489                    (actual, &buf[..])
490                };
491                if actual != expected {
492                    return Err(EncodeError::LeafHashMismatch(start_chunk));
493                }
494                encoded.write_all(to_write)?;
495            }
496        }
497    }
498    Ok(())
499}
500
501/// Decode a response into a file while updating an outboard.
502///
503/// If you do not want to update an outboard, use [super::outboard::EmptyOutboard] as
504/// the outboard.
505pub fn decode_ranges<R, O, W>(
506    encoded: R,
507    ranges: &ChunkRangesRef,
508    mut target: W,
509    mut outboard: O,
510) -> std::result::Result<(), DecodeError>
511where
512    O: OutboardMut + Outboard,
513    R: Read,
514    W: WriteAt,
515{
516    let iter = DecodeResponseIter::new(outboard.root(), outboard.tree(), encoded, ranges);
517    for item in iter {
518        match item? {
519            BaoContentItem::Parent(Parent { node, pair }) => {
520                outboard.save(node, &pair)?;
521            }
522            BaoContentItem::Leaf(Leaf { offset, data }) => {
523                target.write_all_at(offset, &data)?;
524            }
525        }
526    }
527    Ok(())
528}
529
530/// Compute the outboard for the given data.
531///
532/// Unlike [outboard_post_order], this will work with any outboard
533/// implementation, but it is not guaranteed that writes are sequential.
534pub fn outboard(
535    data: impl Read,
536    tree: BaoTree,
537    mut outboard: impl OutboardMut,
538) -> io::Result<blake3::Hash> {
539    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
540    let hash = outboard_impl(tree, data, &mut outboard, &mut buffer)?;
541    Ok(hash)
542}
543
544/// Internal helper for [outboard_post_order]. This takes a buffer of the chunk group size.
545fn outboard_impl(
546    tree: BaoTree,
547    mut data: impl Read,
548    mut outboard: impl OutboardMut,
549    buffer: &mut [u8],
550) -> io::Result<blake3::Hash> {
551    // do not allocate for small trees
552    let mut stack = SmallVec::<[blake3::Hash; 10]>::new();
553    debug_assert!(buffer.len() == tree.chunk_group_bytes());
554    for item in tree.post_order_chunks_iter() {
555        match item {
556            BaoChunk::Parent { is_root, node, .. } => {
557                let right_hash = stack.pop().unwrap();
558                let left_hash = stack.pop().unwrap();
559                outboard.save(node, &(left_hash, right_hash))?;
560                let parent = parent_cv(&left_hash, &right_hash, is_root);
561                stack.push(parent);
562            }
563            BaoChunk::Leaf {
564                size,
565                is_root,
566                start_chunk,
567                ..
568            } => {
569                let buf = &mut buffer[..size];
570                data.read_exact(buf)?;
571                let hash = hash_subtree(start_chunk.0, buf, is_root);
572                stack.push(hash);
573            }
574        }
575    }
576    debug_assert_eq!(stack.len(), 1);
577    let hash = stack.pop().unwrap();
578    Ok(hash)
579}
580
581/// Compute the post order outboard for the given data, writing into a io::Write
582///
583/// For the post order outboard, writes to the target are sequential.
584///
585/// This will not add the size to the output. You need to store it somewhere else
586/// or append it yourself.
587pub fn outboard_post_order(
588    data: impl Read,
589    tree: BaoTree,
590    mut outboard: impl Write,
591) -> io::Result<blake3::Hash> {
592    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
593    let hash = outboard_post_order_impl(tree, data, &mut outboard, &mut buffer)?;
594    Ok(hash)
595}
596
597/// Internal helper for [outboard_post_order]. This takes a buffer of the chunk group size.
598fn outboard_post_order_impl(
599    tree: BaoTree,
600    mut data: impl Read,
601    mut outboard: impl Write,
602    buffer: &mut [u8],
603) -> io::Result<blake3::Hash> {
604    // do not allocate for small trees
605    let mut stack = SmallVec::<[blake3::Hash; 10]>::new();
606    debug_assert!(buffer.len() == tree.chunk_group_bytes());
607    for item in tree.post_order_chunks_iter() {
608        match item {
609            BaoChunk::Parent { is_root, .. } => {
610                let right_hash = stack.pop().unwrap();
611                let left_hash = stack.pop().unwrap();
612                outboard.write_all(left_hash.as_bytes())?;
613                outboard.write_all(right_hash.as_bytes())?;
614                let parent = parent_cv(&left_hash, &right_hash, is_root);
615                stack.push(parent);
616            }
617            BaoChunk::Leaf {
618                size,
619                is_root,
620                start_chunk,
621                ..
622            } => {
623                let buf = &mut buffer[..size];
624                data.read_exact(buf)?;
625                let hash = hash_subtree(start_chunk.0, buf, is_root);
626                stack.push(hash);
627            }
628        }
629    }
630    debug_assert_eq!(stack.len(), 1);
631    let hash = stack.pop().unwrap();
632    Ok(hash)
633}
634
635fn read_parent(mut from: impl Read) -> std::io::Result<(blake3::Hash, blake3::Hash)> {
636    let mut buf = [0; 64];
637    from.read_exact(&mut buf)?;
638    let l_hash = blake3::Hash::from(<[u8; 32]>::try_from(&buf[..32]).unwrap());
639    let r_hash = blake3::Hash::from(<[u8; 32]>::try_from(&buf[32..]).unwrap());
640    Ok((l_hash, r_hash))
641}
642
643/// Copy an outboard to another outboard.
644///
645/// This can be used to persist an in memory outboard or to change from
646/// pre-order to post-order.
647pub fn copy(from: impl Outboard, mut to: impl OutboardMut) -> io::Result<()> {
648    let tree = from.tree();
649    for node in tree.pre_order_nodes_iter() {
650        if let Some(hash_pair) = from.load(node)? {
651            to.save(node, &hash_pair)?;
652        }
653    }
654    Ok(())
655}
656
657#[cfg(feature = "validate")]
658mod validate {
659    use std::{io, ops::Range};
660
661    use genawaiter::sync::{Co, Gen};
662    use positioned_io::ReadAt;
663
664    use super::Outboard;
665    use crate::{
666        blake3, hash_subtree, io::LocalBoxFuture, parent_cv, rec::truncate_ranges, split, BaoTree,
667        ChunkNum, ChunkRangesRef, TreeNode,
668    };
669
670    /// Given a data file and an outboard, compute all valid ranges.
671    ///
672    /// This is not cheap since it recomputes the hashes for all chunks.
673    ///
674    /// To reduce the amount of work, you can specify a range you are interested in.
675    pub fn valid_ranges<'a, O, D>(
676        outboard: O,
677        data: D,
678        ranges: &'a ChunkRangesRef,
679    ) -> impl IntoIterator<Item = io::Result<Range<ChunkNum>>> + 'a
680    where
681        O: Outboard + 'a,
682        D: ReadAt + 'a,
683    {
684        Gen::new(move |co| async move {
685            if let Err(cause) = RecursiveDataValidator::validate(outboard, data, ranges, &co).await
686            {
687                co.yield_(Err(cause)).await;
688            }
689        })
690    }
691
692    struct RecursiveDataValidator<'a, O: Outboard, D: ReadAt> {
693        tree: BaoTree,
694        shifted_filled_size: TreeNode,
695        outboard: O,
696        data: D,
697        buffer: Vec<u8>,
698        co: &'a Co<io::Result<Range<ChunkNum>>>,
699    }
700
701    impl<O: Outboard, D: ReadAt> RecursiveDataValidator<'_, O, D> {
702        async fn validate(
703            outboard: O,
704            data: D,
705            ranges: &ChunkRangesRef,
706            co: &Co<io::Result<Range<ChunkNum>>>,
707        ) -> io::Result<()> {
708            let tree = outboard.tree();
709            let mut buffer = vec![0u8; tree.chunk_group_bytes()];
710            if tree.blocks() == 1 {
711                // special case for a tree that fits in one block / chunk group
712                let tmp = &mut buffer[..tree.size().try_into().unwrap()];
713                data.read_exact_at(0, tmp)?;
714                let actual = hash_subtree(0, tmp, true);
715                if actual == outboard.root() {
716                    co.yield_(Ok(ChunkNum(0)..tree.chunks())).await;
717                }
718                return Ok(());
719            }
720            let ranges = truncate_ranges(ranges, tree.size());
721            let root_hash = outboard.root();
722            let (shifted_root, shifted_filled_size) = tree.shifted();
723            let mut validator = RecursiveDataValidator {
724                tree,
725                shifted_filled_size,
726                outboard,
727                data,
728                buffer,
729                co,
730            };
731            validator
732                .validate_rec(&root_hash, shifted_root, true, ranges)
733                .await
734        }
735
736        async fn yield_if_valid(
737            &mut self,
738            range: Range<u64>,
739            hash: &blake3::Hash,
740            is_root: bool,
741        ) -> io::Result<()> {
742            let len = (range.end - range.start).try_into().unwrap();
743            let tmp = &mut self.buffer[..len];
744            self.data.read_exact_at(range.start, tmp)?;
745            // is_root is always false because the case of a single chunk group is handled before calling this function
746            let actual = hash_subtree(ChunkNum::full_chunks(range.start).0, tmp, is_root);
747            if &actual == hash {
748                // yield the left range
749                self.co
750                    .yield_(Ok(
751                        ChunkNum::full_chunks(range.start)..ChunkNum::chunks(range.end)
752                    ))
753                    .await;
754            }
755            io::Result::Ok(())
756        }
757
758        fn validate_rec<'b>(
759            &'b mut self,
760            parent_hash: &'b blake3::Hash,
761            shifted: TreeNode,
762            is_root: bool,
763            ranges: &'b ChunkRangesRef,
764        ) -> LocalBoxFuture<'b, io::Result<()>> {
765            Box::pin(async move {
766                if ranges.is_empty() {
767                    // this part of the tree is not of interest, so we can skip it
768                    return Ok(());
769                }
770                let node = shifted.subtract_block_size(self.tree.block_size.0);
771                let (l, m, r) = self.tree.leaf_byte_ranges3(node);
772                if !self.tree.is_relevant_for_outboard(node) {
773                    self.yield_if_valid(l..r, parent_hash, is_root).await?;
774                    return Ok(());
775                }
776                let Some((l_hash, r_hash)) = self.outboard.load(node)? else {
777                    // outboard is incomplete, we can't validate
778                    return Ok(());
779                };
780                let actual = parent_cv(&l_hash, &r_hash, is_root);
781                if &actual != parent_hash {
782                    // hash mismatch, we can't validate
783                    return Ok(());
784                };
785                let (l_ranges, r_ranges) = split(ranges, node);
786                if shifted.is_leaf() {
787                    if !l_ranges.is_empty() {
788                        self.yield_if_valid(l..m, &l_hash, false).await?;
789                    }
790                    if !r_ranges.is_empty() {
791                        self.yield_if_valid(m..r, &r_hash, false).await?;
792                    }
793                } else {
794                    // recurse (we are in the domain of the shifted tree)
795                    let left = shifted.left_child().unwrap();
796                    self.validate_rec(&l_hash, left, false, l_ranges).await?;
797                    let right = shifted.right_descendant(self.shifted_filled_size).unwrap();
798                    self.validate_rec(&r_hash, right, false, r_ranges).await?;
799                }
800                Ok(())
801            })
802        }
803    }
804
805    /// Given just an outboard, compute all valid ranges.
806    ///
807    /// This is not cheap since it recomputes the hashes for all chunks.
808    pub fn valid_outboard_ranges<'a, O>(
809        outboard: O,
810        ranges: &'a ChunkRangesRef,
811    ) -> impl IntoIterator<Item = io::Result<Range<ChunkNum>>> + 'a
812    where
813        O: Outboard + 'a,
814    {
815        Gen::new(move |co| async move {
816            if let Err(cause) = RecursiveOutboardValidator::validate(outboard, ranges, &co).await {
817                co.yield_(Err(cause)).await;
818            }
819        })
820    }
821
822    struct RecursiveOutboardValidator<'a, O: Outboard> {
823        tree: BaoTree,
824        shifted_filled_size: TreeNode,
825        outboard: O,
826        co: &'a Co<io::Result<Range<ChunkNum>>>,
827    }
828
829    impl<O: Outboard> RecursiveOutboardValidator<'_, O> {
830        async fn validate(
831            outboard: O,
832            ranges: &ChunkRangesRef,
833            co: &Co<io::Result<Range<ChunkNum>>>,
834        ) -> io::Result<()> {
835            let tree = outboard.tree();
836            if tree.blocks() == 1 {
837                // special case for a tree that fits in one block / chunk group
838                co.yield_(Ok(ChunkNum(0)..tree.chunks())).await;
839                return Ok(());
840            }
841            let ranges = truncate_ranges(ranges, tree.size());
842            let root_hash = outboard.root();
843            let (shifted_root, shifted_filled_size) = tree.shifted();
844            let mut validator = RecursiveOutboardValidator {
845                tree,
846                shifted_filled_size,
847                outboard,
848                co,
849            };
850            validator
851                .validate_rec(&root_hash, shifted_root, true, ranges)
852                .await
853        }
854
855        fn validate_rec<'b>(
856            &'b mut self,
857            parent_hash: &'b blake3::Hash,
858            shifted: TreeNode,
859            is_root: bool,
860            ranges: &'b ChunkRangesRef,
861        ) -> LocalBoxFuture<'b, io::Result<()>> {
862            Box::pin(async move {
863                let yield_node_range = |range: Range<u64>| {
864                    self.co.yield_(Ok(
865                        ChunkNum::full_chunks(range.start)..ChunkNum::chunks(range.end)
866                    ))
867                };
868                if ranges.is_empty() {
869                    // this part of the tree is not of interest, so we can skip it
870                    return Ok(());
871                }
872                let node = shifted.subtract_block_size(self.tree.block_size.0);
873                let (l, m, r) = self.tree.leaf_byte_ranges3(node);
874                if !self.tree.is_relevant_for_outboard(node) {
875                    yield_node_range(l..r).await;
876                    return Ok(());
877                }
878                let Some((l_hash, r_hash)) = self.outboard.load(node)? else {
879                    // outboard is incomplete, we can't validate
880                    return Ok(());
881                };
882                let actual = parent_cv(&l_hash, &r_hash, is_root);
883                if &actual != parent_hash {
884                    // hash mismatch, we can't validate
885                    return Ok(());
886                };
887                let (l_ranges, r_ranges) = split(ranges, node);
888                if shifted.is_leaf() {
889                    if !l_ranges.is_empty() {
890                        yield_node_range(l..m).await;
891                    }
892                    if !r_ranges.is_empty() {
893                        yield_node_range(m..r).await;
894                    }
895                } else {
896                    // recurse (we are in the domain of the shifted tree)
897                    let left = shifted.left_child().unwrap();
898                    self.validate_rec(&l_hash, left, false, l_ranges).await?;
899                    let right = shifted.right_descendant(self.shifted_filled_size).unwrap();
900                    self.validate_rec(&r_hash, right, false, r_ranges).await?;
901                }
902                Ok(())
903            })
904        }
905    }
906}
907#[cfg(feature = "validate")]
908pub use validate::{valid_outboard_ranges, valid_ranges};