bao_tree/io/
sync.rs

1//! Sync IO operations
2//!
3//! The traits to perform positioned io are re-exported from
4//! [positioned-io](https://crates.io/crates/positioned-io).
5use std::{
6    io::{self, Read, Seek, Write},
7    result,
8};
9
10use crate::{
11    blake3,
12    io::{
13        error::EncodeError,
14        outboard::{parse_hash_pair, PostOrderOutboard, PreOrderOutboard},
15        Leaf, Parent,
16    },
17    iter::BaoChunk,
18    rec::{encode_selected_rec, truncate_ranges},
19    BaoTree, BlockSize, ChunkRangesRef, TreeNode,
20};
21use blake3::guts::parent_cv;
22use bytes::BytesMut;
23pub use positioned_io::{ReadAt, Size, WriteAt};
24use smallvec::SmallVec;
25
26use super::{combine_hash_pair, BaoContentItem, DecodeError};
27use crate::{hash_subtree, iter::ResponseIterRef};
28
29/// A binary merkle tree for blake3 hashes of a blob.
30///
31/// This trait contains information about the geometry of the tree, the root hash,
32/// and a method to load the hashes at a given node.
33///
34/// It is up to the implementor to decide how to store the hashes.
35///
36/// In the original bao crate, the hashes are stored in a file in pre order.
37/// This is implemented for a generic io object in [super::outboard::PreOrderOutboard]
38/// and for a memory region in [super::outboard::PreOrderMemOutboard].
39///
40/// For files that grow over time, it is more efficient to store the hashes in post order.
41/// This is implemented for a generic io object in [super::outboard::PostOrderOutboard]
42/// and for a memory region in [super::outboard::PostOrderMemOutboard].
43///
44/// If you use a different storage engine, you can implement this trait for it. E.g.
45/// you could store the hashes in a database and use the node number as the key.
46pub trait Outboard {
47    /// The root hash
48    fn root(&self) -> blake3::Hash;
49    /// The tree. This contains the information about the size of the file and the block size.
50    fn tree(&self) -> BaoTree;
51    /// load the hash pair for a node
52    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>>;
53}
54
55/// A mutable outboard.
56///
57/// This trait provides a way to save a hash pair for a node and to set the
58/// length of the data file.
59///
60/// This trait can be used to incrementally save an outboard when receiving data.
61/// If you want to just ignore outboard data, there is a special placeholder outboard
62/// implementation [super::outboard::EmptyOutboard].
63pub trait OutboardMut: Sized {
64    /// Save a hash pair for a node
65    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()>;
66
67    /// Sync the outboard.
68    fn sync(&mut self) -> io::Result<()>;
69}
70
71/// Convenience trait to initialize an outboard from a data source.
72///
73/// In complex real applications, you might want to do this manually.
74pub trait CreateOutboard {
75    /// Create an outboard from a data source.
76    fn create(mut data: impl Read + Seek, block_size: BlockSize) -> io::Result<Self>
77    where
78        Self: Default + Sized,
79    {
80        let size = data.seek(io::SeekFrom::End(0))?;
81        data.rewind()?;
82        Self::create_sized(data, size, block_size)
83    }
84
85    /// create an outboard from a data source. This requires the outboard to
86    /// have a default implementation, which is the case for the memory
87    /// implementations.
88    fn create_sized(data: impl Read, size: u64, block_size: BlockSize) -> io::Result<Self>
89    where
90        Self: Default + Sized;
91
92    /// Init the outboard from a data source. This will use the existing
93    /// tree and only init the data and set the root hash.
94    ///
95    /// So this can be used to initialize an outboard that does not have a default,
96    /// such as a file based one.
97    ///
98    /// It will only include data up the the current tree size.
99    fn init_from(&mut self, data: impl Read) -> io::Result<()>;
100}
101
102impl<O: OutboardMut> OutboardMut for &mut O {
103    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()> {
104        (**self).save(node, hash_pair)
105    }
106
107    fn sync(&mut self) -> io::Result<()> {
108        (**self).sync()
109    }
110}
111
112impl<O: Outboard> Outboard for &O {
113    fn root(&self) -> blake3::Hash {
114        (**self).root()
115    }
116    fn tree(&self) -> BaoTree {
117        (**self).tree()
118    }
119    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
120        (**self).load(node)
121    }
122}
123
124impl<O: Outboard> Outboard for &mut O {
125    fn root(&self) -> blake3::Hash {
126        (**self).root()
127    }
128    fn tree(&self) -> BaoTree {
129        (**self).tree()
130    }
131    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
132        (**self).load(node)
133    }
134}
135
136impl<R: ReadAt> Outboard for PreOrderOutboard<R> {
137    fn root(&self) -> blake3::Hash {
138        self.root
139    }
140
141    fn tree(&self) -> BaoTree {
142        self.tree
143    }
144
145    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
146        let Some(offset) = self.tree.pre_order_offset(node) else {
147            return Ok(None);
148        };
149        let offset = offset * 64;
150        let mut content = [0u8; 64];
151        self.data.read_exact_at(offset, &mut content)?;
152        Ok(Some(parse_hash_pair(content)))
153    }
154}
155
156impl<W: WriteAt> OutboardMut for PreOrderOutboard<W> {
157    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()> {
158        let Some(offset) = self.tree.pre_order_offset(node) else {
159            return Ok(());
160        };
161        let offset = offset * 64;
162        let mut content = [0u8; 64];
163        content[0..32].copy_from_slice(hash_pair.0.as_bytes());
164        content[32..64].copy_from_slice(hash_pair.1.as_bytes());
165        self.data.write_all_at(offset, &content)?;
166        Ok(())
167    }
168
169    fn sync(&mut self) -> io::Result<()> {
170        self.data.flush()
171    }
172}
173
174impl<W: WriteAt> CreateOutboard for PreOrderOutboard<W> {
175    fn create_sized(data: impl Read, size: u64, block_size: BlockSize) -> io::Result<Self>
176    where
177        Self: Default + Sized,
178    {
179        let tree = BaoTree::new(size, block_size);
180        let mut res = Self {
181            tree,
182            ..Default::default()
183        };
184        res.init_from(data)?;
185        res.sync()?;
186        Ok(res)
187    }
188
189    fn init_from(&mut self, data: impl Read) -> io::Result<()> {
190        let mut this = self;
191        let root = outboard(data, this.tree, &mut this)?;
192        this.root = root;
193        this.sync()?;
194        Ok(())
195    }
196}
197
198impl<W: WriteAt> CreateOutboard for PostOrderOutboard<W> {
199    fn create_sized(data: impl Read, size: u64, block_size: BlockSize) -> io::Result<Self>
200    where
201        Self: Default + Sized,
202    {
203        let tree = BaoTree::new(size, block_size);
204        let mut res = Self {
205            tree,
206            ..Default::default()
207        };
208        res.init_from(data)?;
209        res.sync()?;
210        Ok(res)
211    }
212
213    fn init_from(&mut self, data: impl Read) -> io::Result<()> {
214        let mut this = self;
215        let root = outboard(data, this.tree, &mut this)?;
216        this.root = root;
217        this.sync()?;
218        Ok(())
219    }
220}
221
222impl<W: WriteAt> OutboardMut for PostOrderOutboard<W> {
223    fn save(&mut self, node: TreeNode, hash_pair: &(blake3::Hash, blake3::Hash)) -> io::Result<()> {
224        let Some(offset) = self.tree.post_order_offset(node) else {
225            return Ok(());
226        };
227        let offset = offset.value() * 64;
228        let mut content = [0u8; 64];
229        content[0..32].copy_from_slice(hash_pair.0.as_bytes());
230        content[32..64].copy_from_slice(hash_pair.1.as_bytes());
231        self.data.write_all_at(offset, &content)?;
232        Ok(())
233    }
234
235    fn sync(&mut self) -> io::Result<()> {
236        self.data.flush()
237    }
238}
239
240impl<R: ReadAt> Outboard for PostOrderOutboard<R> {
241    fn root(&self) -> blake3::Hash {
242        self.root
243    }
244
245    fn tree(&self) -> BaoTree {
246        self.tree
247    }
248
249    fn load(&self, node: TreeNode) -> io::Result<Option<(blake3::Hash, blake3::Hash)>> {
250        let Some(offset) = self.tree.post_order_offset(node) else {
251            return Ok(None);
252        };
253        let offset = offset.value() * 64;
254        let mut content = [0u8; 64];
255        self.data.read_exact_at(offset, &mut content)?;
256        Ok(Some(parse_hash_pair(content)))
257    }
258}
259
260/// Iterator that can be used to decode a response to a range request
261#[derive(Debug)]
262pub struct DecodeResponseIter<'a, R> {
263    inner: ResponseIterRef<'a>,
264    stack: SmallVec<[blake3::Hash; 10]>,
265    encoded: R,
266    buf: BytesMut,
267}
268
269impl<'a, R: Read> DecodeResponseIter<'a, R> {
270    /// Create a new iterator to decode a response.
271    ///
272    /// For decoding you need to know the root hash, block size, and the ranges that were requested.
273    /// Additionally you need to provide a reader that can be used to read the encoded data.
274    pub fn new(root: blake3::Hash, tree: BaoTree, encoded: R, ranges: &'a ChunkRangesRef) -> Self {
275        let buf = BytesMut::with_capacity(tree.block_size().bytes());
276        Self::new_with_buffer(root, tree, encoded, ranges, buf)
277    }
278
279    /// Create a new iterator to decode a response.
280    ///
281    /// This is the same as [Self::new], but allows you to provide a buffer to use for decoding.
282    /// The buffer will be resized as needed, but it's capacity should be the [crate::BlockSize::bytes].
283    pub fn new_with_buffer(
284        root: blake3::Hash,
285        tree: BaoTree,
286        encoded: R,
287        ranges: &'a ChunkRangesRef,
288        buf: BytesMut,
289    ) -> Self {
290        let ranges = truncate_ranges(ranges, tree.size());
291        let mut stack = SmallVec::new();
292        stack.push(root);
293        Self {
294            stack,
295            inner: ResponseIterRef::new(tree, ranges),
296            encoded,
297            buf,
298        }
299    }
300
301    /// Get a reference to the buffer used for decoding.
302    pub fn buffer(&self) -> &[u8] {
303        &self.buf
304    }
305
306    /// Get a reference to the tree used for decoding.
307    ///
308    /// This is only available after the first chunk has been decoded.
309    pub fn tree(&self) -> BaoTree {
310        self.inner.tree()
311    }
312
313    fn next0(&mut self) -> result::Result<Option<BaoContentItem>, DecodeError> {
314        match self.inner.next() {
315            Some(BaoChunk::Parent {
316                is_root,
317                left,
318                right,
319                node,
320                ..
321            }) => {
322                let pair @ (l_hash, r_hash) = read_parent(&mut self.encoded)
323                    .map_err(|e| DecodeError::maybe_parent_not_found(e, node))?;
324                let parent_hash = self.stack.pop().unwrap();
325                let actual = parent_cv(&l_hash, &r_hash, is_root);
326                if parent_hash != actual {
327                    return Err(DecodeError::ParentHashMismatch(node));
328                }
329                if right {
330                    self.stack.push(r_hash);
331                }
332                if left {
333                    self.stack.push(l_hash);
334                }
335                Ok(Some(Parent { node, pair }.into()))
336            }
337            Some(BaoChunk::Leaf {
338                size,
339                is_root,
340                start_chunk,
341                ..
342            }) => {
343                self.buf.resize(size, 0);
344                self.encoded
345                    .read_exact(&mut self.buf)
346                    .map_err(|e| DecodeError::maybe_leaf_not_found(e, start_chunk))?;
347                let actual = hash_subtree(start_chunk.0, &self.buf, is_root);
348                let leaf_hash = self.stack.pop().unwrap();
349                if leaf_hash != actual {
350                    return Err(DecodeError::LeafHashMismatch(start_chunk));
351                }
352                Ok(Some(
353                    Leaf {
354                        offset: start_chunk.to_bytes(),
355                        data: self.buf.split().freeze(),
356                    }
357                    .into(),
358                ))
359            }
360            None => Ok(None),
361        }
362    }
363}
364
365impl<'a, R: Read> Iterator for DecodeResponseIter<'a, R> {
366    type Item = result::Result<BaoContentItem, DecodeError>;
367
368    fn next(&mut self) -> Option<Self::Item> {
369        self.next0().transpose()
370    }
371}
372
373/// Encode ranges relevant to a query from a reader and outboard to a writer
374///
375/// This will not validate on writing, so data corruption will be detected on reading
376///
377/// It is possible to encode ranges from a partial file and outboard.
378/// This will either succeed if the requested ranges are all present, or fail
379/// as soon as a range is missing.
380pub fn encode_ranges<D: ReadAt + Size, O: Outboard, W: Write>(
381    data: D,
382    outboard: O,
383    ranges: &ChunkRangesRef,
384    encoded: W,
385) -> result::Result<(), EncodeError> {
386    let data = data;
387    let mut encoded = encoded;
388    let tree = outboard.tree();
389    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
390    for item in tree.ranges_pre_order_chunks_iter_ref(ranges, 0) {
391        match item {
392            BaoChunk::Parent { node, .. } => {
393                let (l_hash, r_hash) = outboard.load(node)?.unwrap();
394                let pair = combine_hash_pair(&l_hash, &r_hash);
395                encoded.write_all(&pair)?;
396            }
397            BaoChunk::Leaf {
398                start_chunk, size, ..
399            } => {
400                let start = start_chunk.to_bytes();
401                let buf = &mut buffer[..size];
402                data.read_exact_at(start, buf)?;
403                encoded.write_all(buf)?;
404            }
405        }
406    }
407    Ok(())
408}
409
410/// Encode ranges relevant to a query from a reader and outboard to a writer
411///
412/// This function validates the data before writing.
413///
414/// It is possible to encode ranges from a partial file and outboard.
415/// This will either succeed if the requested ranges are all present, or fail
416/// as soon as a range is missing.
417pub fn encode_ranges_validated<D: ReadAt + Size, O: Outboard, W: Write>(
418    data: D,
419    outboard: O,
420    ranges: &ChunkRangesRef,
421    encoded: W,
422) -> result::Result<(), EncodeError> {
423    let mut stack = SmallVec::<[blake3::Hash; 10]>::new();
424    stack.push(outboard.root());
425    let data = data;
426    let mut encoded = encoded;
427    let tree = outboard.tree();
428    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
429    let mut out_buf = Vec::new();
430    // canonicalize ranges
431    let ranges = truncate_ranges(ranges, tree.size());
432    for item in tree.ranges_pre_order_chunks_iter_ref(ranges, 0) {
433        match item {
434            BaoChunk::Parent {
435                is_root,
436                left,
437                right,
438                node,
439                ..
440            } => {
441                let (l_hash, r_hash) = outboard.load(node)?.unwrap();
442                let actual = parent_cv(&l_hash, &r_hash, is_root);
443                let expected = stack.pop().unwrap();
444                if actual != expected {
445                    return Err(EncodeError::ParentHashMismatch(node));
446                }
447                if right {
448                    stack.push(r_hash);
449                }
450                if left {
451                    stack.push(l_hash);
452                }
453                let pair = combine_hash_pair(&l_hash, &r_hash);
454                encoded.write_all(&pair)?;
455            }
456            BaoChunk::Leaf {
457                start_chunk,
458                size,
459                is_root,
460                ranges,
461                ..
462            } => {
463                let expected = stack.pop().unwrap();
464                let start = start_chunk.to_bytes();
465                let buf = &mut buffer[..size];
466                data.read_exact_at(start, buf)?;
467                let (actual, to_write) = if !ranges.is_all() {
468                    // we need to encode just a part of the data
469                    //
470                    // write into an out buffer to ensure we detect mismatches
471                    // before writing to the output.
472                    out_buf.clear();
473                    let actual = encode_selected_rec(
474                        start_chunk,
475                        buf,
476                        is_root,
477                        ranges,
478                        tree.block_size.to_u32(),
479                        true,
480                        &mut out_buf,
481                    );
482                    (actual, &out_buf[..])
483                } else {
484                    let actual = hash_subtree(start_chunk.0, buf, is_root);
485                    #[allow(clippy::redundant_slicing)]
486                    (actual, &buf[..])
487                };
488                if actual != expected {
489                    return Err(EncodeError::LeafHashMismatch(start_chunk));
490                }
491                encoded.write_all(to_write)?;
492            }
493        }
494    }
495    Ok(())
496}
497
498/// Decode a response into a file while updating an outboard.
499///
500/// If you do not want to update an outboard, use [super::outboard::EmptyOutboard] as
501/// the outboard.
502pub fn decode_ranges<R, O, W>(
503    encoded: R,
504    ranges: &ChunkRangesRef,
505    mut target: W,
506    mut outboard: O,
507) -> std::result::Result<(), DecodeError>
508where
509    O: OutboardMut + Outboard,
510    R: Read,
511    W: WriteAt,
512{
513    let iter = DecodeResponseIter::new(outboard.root(), outboard.tree(), encoded, ranges);
514    for item in iter {
515        match item? {
516            BaoContentItem::Parent(Parent { node, pair }) => {
517                outboard.save(node, &pair)?;
518            }
519            BaoContentItem::Leaf(Leaf { offset, data }) => {
520                target.write_all_at(offset, &data)?;
521            }
522        }
523    }
524    Ok(())
525}
526
527/// Compute the outboard for the given data.
528///
529/// Unlike [outboard_post_order], this will work with any outboard
530/// implementation, but it is not guaranteed that writes are sequential.
531pub fn outboard(
532    data: impl Read,
533    tree: BaoTree,
534    mut outboard: impl OutboardMut,
535) -> io::Result<blake3::Hash> {
536    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
537    let hash = outboard_impl(tree, data, &mut outboard, &mut buffer)?;
538    Ok(hash)
539}
540
541/// Internal helper for [outboard_post_order]. This takes a buffer of the chunk group size.
542fn outboard_impl(
543    tree: BaoTree,
544    mut data: impl Read,
545    mut outboard: impl OutboardMut,
546    buffer: &mut [u8],
547) -> io::Result<blake3::Hash> {
548    // do not allocate for small trees
549    let mut stack = SmallVec::<[blake3::Hash; 10]>::new();
550    debug_assert!(buffer.len() == tree.chunk_group_bytes());
551    for item in tree.post_order_chunks_iter() {
552        match item {
553            BaoChunk::Parent { is_root, node, .. } => {
554                let right_hash = stack.pop().unwrap();
555                let left_hash = stack.pop().unwrap();
556                outboard.save(node, &(left_hash, right_hash))?;
557                let parent = parent_cv(&left_hash, &right_hash, is_root);
558                stack.push(parent);
559            }
560            BaoChunk::Leaf {
561                size,
562                is_root,
563                start_chunk,
564                ..
565            } => {
566                let buf = &mut buffer[..size];
567                data.read_exact(buf)?;
568                let hash = hash_subtree(start_chunk.0, buf, is_root);
569                stack.push(hash);
570            }
571        }
572    }
573    debug_assert_eq!(stack.len(), 1);
574    let hash = stack.pop().unwrap();
575    Ok(hash)
576}
577
578/// Compute the post order outboard for the given data, writing into a io::Write
579///
580/// For the post order outboard, writes to the target are sequential.
581///
582/// This will not add the size to the output. You need to store it somewhere else
583/// or append it yourself.
584pub fn outboard_post_order(
585    data: impl Read,
586    tree: BaoTree,
587    mut outboard: impl Write,
588) -> io::Result<blake3::Hash> {
589    let mut buffer = vec![0u8; tree.chunk_group_bytes()];
590    let hash = outboard_post_order_impl(tree, data, &mut outboard, &mut buffer)?;
591    Ok(hash)
592}
593
594/// Internal helper for [outboard_post_order]. This takes a buffer of the chunk group size.
595fn outboard_post_order_impl(
596    tree: BaoTree,
597    mut data: impl Read,
598    mut outboard: impl Write,
599    buffer: &mut [u8],
600) -> io::Result<blake3::Hash> {
601    // do not allocate for small trees
602    let mut stack = SmallVec::<[blake3::Hash; 10]>::new();
603    debug_assert!(buffer.len() == tree.chunk_group_bytes());
604    for item in tree.post_order_chunks_iter() {
605        match item {
606            BaoChunk::Parent { is_root, .. } => {
607                let right_hash = stack.pop().unwrap();
608                let left_hash = stack.pop().unwrap();
609                outboard.write_all(left_hash.as_bytes())?;
610                outboard.write_all(right_hash.as_bytes())?;
611                let parent = parent_cv(&left_hash, &right_hash, is_root);
612                stack.push(parent);
613            }
614            BaoChunk::Leaf {
615                size,
616                is_root,
617                start_chunk,
618                ..
619            } => {
620                let buf = &mut buffer[..size];
621                data.read_exact(buf)?;
622                let hash = hash_subtree(start_chunk.0, buf, is_root);
623                stack.push(hash);
624            }
625        }
626    }
627    debug_assert_eq!(stack.len(), 1);
628    let hash = stack.pop().unwrap();
629    Ok(hash)
630}
631
632fn read_parent(mut from: impl Read) -> std::io::Result<(blake3::Hash, blake3::Hash)> {
633    let mut buf = [0; 64];
634    from.read_exact(&mut buf)?;
635    let l_hash = blake3::Hash::from(<[u8; 32]>::try_from(&buf[..32]).unwrap());
636    let r_hash = blake3::Hash::from(<[u8; 32]>::try_from(&buf[32..]).unwrap());
637    Ok((l_hash, r_hash))
638}
639
640/// Copy an outboard to another outboard.
641///
642/// This can be used to persist an in memory outboard or to change from
643/// pre-order to post-order.
644pub fn copy(from: impl Outboard, mut to: impl OutboardMut) -> io::Result<()> {
645    let tree = from.tree();
646    for node in tree.pre_order_nodes_iter() {
647        if let Some(hash_pair) = from.load(node)? {
648            to.save(node, &hash_pair)?;
649        }
650    }
651    Ok(())
652}
653
654#[cfg(feature = "validate")]
655mod validate {
656    use std::{io, ops::Range};
657
658    use genawaiter::sync::{Co, Gen};
659    use positioned_io::ReadAt;
660
661    use crate::{
662        blake3, hash_subtree, io::LocalBoxFuture, rec::truncate_ranges, split, BaoTree, ChunkNum,
663        ChunkRangesRef, TreeNode,
664    };
665
666    use super::Outboard;
667
668    /// Given a data file and an outboard, compute all valid ranges.
669    ///
670    /// This is not cheap since it recomputes the hashes for all chunks.
671    ///
672    /// To reduce the amount of work, you can specify a range you are interested in.
673    pub fn valid_ranges<'a, O, D>(
674        outboard: O,
675        data: D,
676        ranges: &'a ChunkRangesRef,
677    ) -> impl IntoIterator<Item = io::Result<Range<ChunkNum>>> + 'a
678    where
679        O: Outboard + 'a,
680        D: ReadAt + 'a,
681    {
682        Gen::new(move |co| async move {
683            if let Err(cause) = RecursiveDataValidator::validate(outboard, data, ranges, &co).await
684            {
685                co.yield_(Err(cause)).await;
686            }
687        })
688    }
689
690    struct RecursiveDataValidator<'a, O: Outboard, D: ReadAt> {
691        tree: BaoTree,
692        shifted_filled_size: TreeNode,
693        outboard: O,
694        data: D,
695        buffer: Vec<u8>,
696        co: &'a Co<io::Result<Range<ChunkNum>>>,
697    }
698
699    impl<'a, O: Outboard, D: ReadAt> RecursiveDataValidator<'a, O, D> {
700        async fn validate(
701            outboard: O,
702            data: D,
703            ranges: &ChunkRangesRef,
704            co: &Co<io::Result<Range<ChunkNum>>>,
705        ) -> io::Result<()> {
706            let tree = outboard.tree();
707            let mut buffer = vec![0u8; tree.chunk_group_bytes()];
708            if tree.blocks() == 1 {
709                // special case for a tree that fits in one block / chunk group
710                let tmp = &mut buffer[..tree.size().try_into().unwrap()];
711                data.read_exact_at(0, tmp)?;
712                let actual = hash_subtree(0, tmp, true);
713                if actual == outboard.root() {
714                    co.yield_(Ok(ChunkNum(0)..tree.chunks())).await;
715                }
716                return Ok(());
717            }
718            let ranges = truncate_ranges(ranges, tree.size());
719            let root_hash = outboard.root();
720            let (shifted_root, shifted_filled_size) = tree.shifted();
721            let mut validator = RecursiveDataValidator {
722                tree,
723                shifted_filled_size,
724                outboard,
725                data,
726                buffer,
727                co,
728            };
729            validator
730                .validate_rec(&root_hash, shifted_root, true, ranges)
731                .await
732        }
733
734        async fn yield_if_valid(
735            &mut self,
736            range: Range<u64>,
737            hash: &blake3::Hash,
738            is_root: bool,
739        ) -> io::Result<()> {
740            let len = (range.end - range.start).try_into().unwrap();
741            let tmp = &mut self.buffer[..len];
742            self.data.read_exact_at(range.start, tmp)?;
743            // is_root is always false because the case of a single chunk group is handled before calling this function
744            let actual = hash_subtree(ChunkNum::full_chunks(range.start).0, tmp, is_root);
745            if &actual == hash {
746                // yield the left range
747                self.co
748                    .yield_(Ok(
749                        ChunkNum::full_chunks(range.start)..ChunkNum::chunks(range.end)
750                    ))
751                    .await;
752            }
753            io::Result::Ok(())
754        }
755
756        fn validate_rec<'b>(
757            &'b mut self,
758            parent_hash: &'b blake3::Hash,
759            shifted: TreeNode,
760            is_root: bool,
761            ranges: &'b ChunkRangesRef,
762        ) -> LocalBoxFuture<'b, io::Result<()>> {
763            Box::pin(async move {
764                if ranges.is_empty() {
765                    // this part of the tree is not of interest, so we can skip it
766                    return Ok(());
767                }
768                let node = shifted.subtract_block_size(self.tree.block_size.0);
769                let (l, m, r) = self.tree.leaf_byte_ranges3(node);
770                if !self.tree.is_relevant_for_outboard(node) {
771                    self.yield_if_valid(l..r, parent_hash, is_root).await?;
772                    return Ok(());
773                }
774                let Some((l_hash, r_hash)) = self.outboard.load(node)? else {
775                    // outboard is incomplete, we can't validate
776                    return Ok(());
777                };
778                let actual = blake3::guts::parent_cv(&l_hash, &r_hash, is_root);
779                if &actual != parent_hash {
780                    // hash mismatch, we can't validate
781                    return Ok(());
782                };
783                let (l_ranges, r_ranges) = split(ranges, node);
784                if shifted.is_leaf() {
785                    if !l_ranges.is_empty() {
786                        self.yield_if_valid(l..m, &l_hash, false).await?;
787                    }
788                    if !r_ranges.is_empty() {
789                        self.yield_if_valid(m..r, &r_hash, false).await?;
790                    }
791                } else {
792                    // recurse (we are in the domain of the shifted tree)
793                    let left = shifted.left_child().unwrap();
794                    self.validate_rec(&l_hash, left, false, l_ranges).await?;
795                    let right = shifted.right_descendant(self.shifted_filled_size).unwrap();
796                    self.validate_rec(&r_hash, right, false, r_ranges).await?;
797                }
798                Ok(())
799            })
800        }
801    }
802
803    /// Given just an outboard, compute all valid ranges.
804    ///
805    /// This is not cheap since it recomputes the hashes for all chunks.
806    pub fn valid_outboard_ranges<'a, O>(
807        outboard: O,
808        ranges: &'a ChunkRangesRef,
809    ) -> impl IntoIterator<Item = io::Result<Range<ChunkNum>>> + 'a
810    where
811        O: Outboard + 'a,
812    {
813        Gen::new(move |co| async move {
814            if let Err(cause) = RecursiveOutboardValidator::validate(outboard, ranges, &co).await {
815                co.yield_(Err(cause)).await;
816            }
817        })
818    }
819
820    struct RecursiveOutboardValidator<'a, O: Outboard> {
821        tree: BaoTree,
822        shifted_filled_size: TreeNode,
823        outboard: O,
824        co: &'a Co<io::Result<Range<ChunkNum>>>,
825    }
826
827    impl<'a, O: Outboard> RecursiveOutboardValidator<'a, O> {
828        async fn validate(
829            outboard: O,
830            ranges: &ChunkRangesRef,
831            co: &Co<io::Result<Range<ChunkNum>>>,
832        ) -> io::Result<()> {
833            let tree = outboard.tree();
834            if tree.blocks() == 1 {
835                // special case for a tree that fits in one block / chunk group
836                co.yield_(Ok(ChunkNum(0)..tree.chunks())).await;
837                return Ok(());
838            }
839            let ranges = truncate_ranges(ranges, tree.size());
840            let root_hash = outboard.root();
841            let (shifted_root, shifted_filled_size) = tree.shifted();
842            let mut validator = RecursiveOutboardValidator {
843                tree,
844                shifted_filled_size,
845                outboard,
846                co,
847            };
848            validator
849                .validate_rec(&root_hash, shifted_root, true, ranges)
850                .await
851        }
852
853        fn validate_rec<'b>(
854            &'b mut self,
855            parent_hash: &'b blake3::Hash,
856            shifted: TreeNode,
857            is_root: bool,
858            ranges: &'b ChunkRangesRef,
859        ) -> LocalBoxFuture<'b, io::Result<()>> {
860            Box::pin(async move {
861                let yield_node_range = |range: Range<u64>| {
862                    self.co.yield_(Ok(
863                        ChunkNum::full_chunks(range.start)..ChunkNum::chunks(range.end)
864                    ))
865                };
866                if ranges.is_empty() {
867                    // this part of the tree is not of interest, so we can skip it
868                    return Ok(());
869                }
870                let node = shifted.subtract_block_size(self.tree.block_size.0);
871                let (l, m, r) = self.tree.leaf_byte_ranges3(node);
872                if !self.tree.is_relevant_for_outboard(node) {
873                    yield_node_range(l..r).await;
874                    return Ok(());
875                }
876                let Some((l_hash, r_hash)) = self.outboard.load(node)? else {
877                    // outboard is incomplete, we can't validate
878                    return Ok(());
879                };
880                let actual = blake3::guts::parent_cv(&l_hash, &r_hash, is_root);
881                if &actual != parent_hash {
882                    // hash mismatch, we can't validate
883                    return Ok(());
884                };
885                let (l_ranges, r_ranges) = split(ranges, node);
886                if shifted.is_leaf() {
887                    if !l_ranges.is_empty() {
888                        yield_node_range(l..m).await;
889                    }
890                    if !r_ranges.is_empty() {
891                        yield_node_range(m..r).await;
892                    }
893                } else {
894                    // recurse (we are in the domain of the shifted tree)
895                    let left = shifted.left_child().unwrap();
896                    self.validate_rec(&l_hash, left, false, l_ranges).await?;
897                    let right = shifted.right_descendant(self.shifted_filled_size).unwrap();
898                    self.validate_rec(&r_hash, right, false, r_ranges).await?;
899                }
900                Ok(())
901            })
902        }
903    }
904}
905#[cfg(feature = "validate")]
906pub use validate::{valid_outboard_ranges, valid_ranges};