fleek_blake3/
tree.rs

1use crate::{join, platform};
2use arrayref::array_ref;
3use arrayvec::ArrayVec;
4use core::cmp;
5
6/// Block size for Fleek's proof of delivery. Which is 256KiB. This number is in Blake3
7/// chunks and the code is implemented assuming this is always a power of two.
8const TREE_BLOCK_SIZE_IN_CHUNK_LOG_2: usize = 8;
9const TREE_BLOCK_SIZE_IN_CHUNK: usize = 1 << TREE_BLOCK_SIZE_IN_CHUNK_LOG_2;
10const TREE_BLOCK_SIZE_BYTES: usize = TREE_BLOCK_SIZE_IN_CHUNK * crate::CHUNK_LEN;
11const TREE_BLOCK_SIZE_IN_CHUNK_MOD_MASK: u64 = (TREE_BLOCK_SIZE_IN_CHUNK - 1) as u64;
12const TREE_MAX_TREE_DEPTH: usize = crate::MAX_DEPTH - TREE_BLOCK_SIZE_IN_CHUNK_LOG_2;
13
14/// Initialization vector for a Blake3 hasher, if `BlockHasher::new()` or `HashTreeBuilder::new()`
15/// are not sufficient for you and you need more customization on the IV, such as keyed hash
16/// function, you can achieve it by constructing the [`IV`] first using a method like
17/// [`IV::new_keyed`] and convert it to either of these by simply using the [`Into`] trait.
18#[derive(Clone, Copy)]
19pub struct IV {
20    key: crate::CVWords,
21    flags: u8,
22}
23
24/// The hash tree generated using a [`HashTreeBuilder`].
25#[derive(Clone)]
26pub struct HashTree {
27    /// The root hash of the content. This should be the same as when you
28    /// run a normal blake3 hash function.
29    pub hash: crate::Hash,
30    /// The array representation of the tree full tree.
31    pub tree: Vec<[u8; 32]>,
32}
33
34/// An incremental Blake3 hasher that also maintains an array representation of the full
35/// Blake3 tree, the generated array will be in the same stack ordering as Blake3.
36///
37/// The number is this tree are the index of each of these nodes in the array representation,
38/// and because of how Blake3 functions, the left subtree will always contain a power of 2 number
39/// of chunks.
40///
41/// ```text
42///                 
43///           8
44///          / \
45///         /   \
46///        6     7
47///       / \
48///      /   \
49///     /     \
50///    2       5
51///   / \     / \
52///  /   \   /   \
53/// 0     1 3     4
54///
55/// ```
56#[derive(Clone)]
57pub struct HashTreeBuilder {
58    tree: Vec<[u8; 32]>,
59    block_state: BlockHasher,
60    cv_stack: ArrayVec<crate::CVBytes, { TREE_MAX_TREE_DEPTH + 1 }>,
61    block_counter: u64,
62}
63
64/// Incremental hasher for a single block, this can only be used to hash only one block.
65#[derive(Clone)]
66pub struct BlockHasher {
67    key: crate::CVWords,
68    chunk_state: crate::ChunkState,
69    cv_stack: ArrayVec<crate::CVBytes, { TREE_BLOCK_SIZE_IN_CHUNK_LOG_2 + 1 }>,
70}
71
72impl IV {
73    const fn new_internal(key: &crate::CVWords, flags: u8) -> Self {
74        Self { key: *key, flags }
75    }
76
77    /// Create a new `IV` from the default configurations. This is the equivalent
78    /// IV of creating a `Hasher::new()`.
79    pub const fn new() -> Self {
80        Self::new_internal(crate::IV, 0)
81    }
82
83    /// The keyed hash function.
84    ///
85    /// This is suitable for use as a message authentication code, for example to
86    /// replace an HMAC instance. In that use case, the constant-time equality
87    /// checking provided by [`Hash`](crate::Hash) is almost always a security
88    /// requirement, and callers need to be careful not to compare MACs as raw
89    /// bytes.
90    pub fn new_keyed(key: &[u8; crate::KEY_LEN]) -> Self {
91        let key_words = platform::words_from_le_bytes_32(key);
92        Self::new_internal(&key_words, crate::KEYED_HASH)
93    }
94
95    /// The key derivation function.
96    ///
97    /// Given cryptographic key material of any length and a context string of any
98    /// length, this function outputs a 32-byte derived subkey. **The context string
99    /// should be hardcoded, globally unique, and application-specific.** A good
100    /// default format for such strings is `"[application] [commit timestamp]
101    /// [purpose]"`, e.g., `"example.com 2019-12-25 16:18:03 session tokens v1"`.
102    ///
103    /// Key derivation is important when you want to use the same key in multiple
104    /// algorithms or use cases. Using the same key with different cryptographic
105    /// algorithms is generally forbidden, and deriving a separate subkey for each
106    /// use case protects you from bad interactions. Derived keys also mitigate the
107    /// damage from one part of your application accidentally leaking its key.
108    pub fn new_derive_key(context: &str) -> Self {
109        let context_key = crate::hash_all_at_once::<join::SerialJoin>(
110            context.as_bytes(),
111            crate::IV,
112            crate::DERIVE_KEY_CONTEXT,
113        )
114        .root_hash();
115        let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes());
116        Self::new_internal(&context_key_words, crate::DERIVE_KEY_MATERIAL)
117    }
118}
119
120impl IV {
121    /// Merge two children into the parent hash, the `is_root` determines if the result
122    /// is the finalized hash.
123    pub fn merge(&self, left: &[u8; 32], right: &[u8; 32], is_root: bool) -> [u8; 32] {
124        let output = crate::parent_node_output(
125            left,
126            right,
127            &self.key,
128            self.flags,
129            platform::Platform::detect(),
130        );
131        if is_root {
132            output.root_hash().0
133        } else {
134            output.chaining_value()
135        }
136    }
137}
138
139impl Default for IV {
140    fn default() -> Self {
141        Self::new()
142    }
143}
144
145impl HashTreeBuilder {
146    /// Create a new [`HashTreeBuilder`] with the default IV.
147    pub fn new() -> Self {
148        IV::new().into()
149    }
150
151    /// Input some more bytes to this hasher, this function is always single-threaded
152    /// use [`Self::update_rayon`] to achieve multi-threaded implementation with rayon.
153    ///
154    /// Also for the best performance it is recommended that you feed full blocks to this
155    /// function or at least powers of two.
156    pub fn update(&mut self, input: &[u8]) {
157        self.update_with_join::<join::SerialJoin>(input);
158    }
159
160    /// Just like `update` but runs the hasher using rayon for work-stealing and parallelism.
161    #[cfg(feature = "rayon")]
162    pub fn update_rayon(&mut self, input: &[u8]) {
163        self.update_with_join::<join::RayonJoin>(input)
164    }
165
166    pub fn update_with_join<J: join::Join>(&mut self, mut input: &[u8]) {
167        // If we have an incomplete block, try to feed it more bytes.
168        let block_state_len = self.block_state.len();
169        if block_state_len > 0 {
170            let want = TREE_BLOCK_SIZE_BYTES - block_state_len;
171            let take = cmp::min(want, input.len());
172            self.block_state.update_with_join::<J>(&input[..take]);
173            input = &input[take..];
174
175            if input.is_empty() {
176                return;
177            }
178
179            // The block is filled, let's finalize it and push it to the stack.
180            let block_cv = self.block_state.final_output().chaining_value();
181            self.push_cv(&block_cv);
182            self.block_state.move_to_next_block();
183
184            debug_assert_eq!(
185                self.block_state.chunk_state.len(),
186                0,
187                "chunk state must be empty"
188            );
189        }
190
191        while input.len() >= 2 * TREE_BLOCK_SIZE_BYTES {
192            // If the caller of this function keeps passing chunks of exact size of the fleek
193            // block size, we will only ever reach this loop in the code which is super fast.
194            let cv_pair = crate::compress_subtree_to_parent_node::<J>(
195                &input[..2 * TREE_BLOCK_SIZE_BYTES],
196                &self.block_state.key,
197                self.block_state.chunk_state.chunk_counter,
198                self.block_state.chunk_state.flags,
199                self.block_state.chunk_state.platform,
200            );
201
202            let left_cv = array_ref!(cv_pair, 0, 32);
203            let right_cv = array_ref!(cv_pair, 32, 32);
204            self.push_cv(left_cv);
205            self.push_cv(right_cv);
206
207            self.block_state.chunk_state.chunk_counter += 2 * TREE_BLOCK_SIZE_IN_CHUNK as u64;
208            input = &input[2 * TREE_BLOCK_SIZE_BYTES..];
209        }
210
211        // If we are hashing any block other than the first one (i.e cv_stack.len() > 0), we
212        // already know we don't need to use that block as a root_hash, so we can compress it
213        // here.
214        let is_non_first_block = !self.cv_stack.is_empty() && input.len() == TREE_BLOCK_SIZE_BYTES;
215        // If we have a complete block with an additional byte, we know we can finalize it here.
216        let input_larger_than_one_block = input.len() > TREE_BLOCK_SIZE_BYTES;
217
218        if input_larger_than_one_block || is_non_first_block {
219            let cv_pair = crate::compress_subtree_to_parent_node::<J>(
220                &input[..TREE_BLOCK_SIZE_BYTES],
221                &self.block_state.key,
222                self.block_state.chunk_state.chunk_counter,
223                self.block_state.chunk_state.flags,
224                self.block_state.chunk_state.platform,
225            );
226
227            let left_cv = array_ref!(cv_pair, 0, 32);
228            let right_cv = array_ref!(cv_pair, 32, 32);
229            let parent_output = crate::parent_node_output(
230                &left_cv,
231                &right_cv,
232                &self.block_state.key,
233                self.block_state.chunk_state.flags,
234                self.block_state.chunk_state.platform,
235            )
236            .chaining_value();
237
238            self.push_cv(&parent_output);
239            self.block_state.chunk_state.chunk_counter += TREE_BLOCK_SIZE_IN_CHUNK as u64;
240            input = &input[TREE_BLOCK_SIZE_BYTES..];
241        }
242
243        debug_assert!(input.len() <= TREE_BLOCK_SIZE_BYTES);
244        if !input.is_empty() {
245            self.merge_cv_stack();
246            self.block_state.update_with_join::<J>(input);
247        }
248    }
249
250    #[inline(always)]
251    fn merge_cv_stack(&mut self) {
252        let post_merge_stack_len = self.block_counter.count_ones() as usize;
253        while self.cv_stack.len() > post_merge_stack_len {
254            let right_child = self.cv_stack.pop().unwrap();
255            let left_child = self.cv_stack.pop().unwrap();
256            let parent_cv = crate::parent_node_output(
257                &left_child,
258                &right_child,
259                &self.block_state.key,
260                self.block_state.chunk_state.flags,
261                self.block_state.chunk_state.platform,
262            )
263            .chaining_value();
264            self.cv_stack.push(parent_cv);
265            self.tree.push(parent_cv);
266        }
267    }
268
269    fn push_cv(&mut self, new_cv: &crate::CVBytes) {
270        self.merge_cv_stack();
271        self.cv_stack.push(*new_cv);
272        self.tree.push(*new_cv);
273        self.block_counter += 1;
274    }
275
276    fn final_output(&mut self) -> crate::Output {
277        // If the current chunk is the only chunk, that makes it the root node
278        // also. Convert it directly into an Output. Otherwise, we need to
279        // merge subtrees below.
280        if self.cv_stack.is_empty() {
281            return self.block_state.final_output();
282        }
283
284        let mut output: crate::Output;
285        let mut num_cvs_remaining = self.cv_stack.len();
286
287        if self.block_state.len() > 0 {
288            debug_assert_eq!(
289                self.cv_stack.len(),
290                self.block_counter.count_ones() as usize,
291                "cv stack does not need a merge"
292            );
293            output = self.block_state.final_output();
294        } else {
295            debug_assert!(self.cv_stack.len() >= 2);
296            output = crate::parent_node_output(
297                &self.cv_stack[num_cvs_remaining - 2],
298                &self.cv_stack[num_cvs_remaining - 1],
299                &self.block_state.key,
300                self.block_state.chunk_state.flags,
301                self.block_state.chunk_state.platform,
302            );
303            num_cvs_remaining -= 2;
304        }
305        while num_cvs_remaining > 0 {
306            self.tree.push(output.chaining_value());
307            output = crate::parent_node_output(
308                &self.cv_stack[num_cvs_remaining - 1],
309                &output.chaining_value(),
310                &self.block_state.key,
311                self.block_state.chunk_state.flags,
312                self.block_state.chunk_state.platform,
313            );
314            num_cvs_remaining -= 1;
315        }
316        output
317    }
318
319    /// Return the result of this execution including the root hash and the tree.
320    pub fn finalize(mut self) -> HashTree {
321        let root_hash = self.final_output().root_hash();
322        self.tree.push(root_hash.0.clone());
323        HashTree {
324            hash: root_hash,
325            tree: self.tree,
326        }
327    }
328
329    /// Return the hash of the given block, returns `None` if we are not there yet.
330    pub fn get_block_hash(&self, block_counter: usize) -> Option<[u8; 32]> {
331        let index = block_counter * 2 - block_counter.count_ones() as usize;
332        self.tree.get(index).copied()
333    }
334}
335
336impl BlockHasher {
337    /// Create a new block hasher using the default `IV`
338    pub fn new() -> Self {
339        IV::new().into()
340    }
341
342    pub fn update(&mut self, input: &[u8]) {
343        self.update_with_join::<join::SerialJoin>(input);
344    }
345
346    #[cfg(feature = "rayon")]
347    pub fn update_rayon(&mut self, input: &[u8]) {
348        self.update_with_join::<join::RayonJoin>(input)
349    }
350
351    pub fn update_with_join<J: join::Join>(&mut self, mut input: &[u8]) {
352        debug_assert!(
353            input.len() + self.len() <= TREE_BLOCK_SIZE_BYTES,
354            "data for HalfBlockState exceeded its limit."
355        );
356
357        if self.chunk_state.len() > 0 {
358            let want = crate::CHUNK_LEN - self.chunk_state.len();
359            let take = cmp::min(want, input.len());
360            self.chunk_state.update(&input[..take]);
361            input = &input[take..];
362
363            if input.is_empty() {
364                return;
365            }
366
367            debug_assert_eq!(self.chunk_state.len(), crate::CHUNK_LEN);
368
369            // There is more data coming in, which means this is not the root so we can
370            // finalize the chunk state here and move the chunk counter forward.
371            let chunk_cv = self.chunk_state.output().chaining_value();
372            self.push_cv(&chunk_cv, self.chunk_state.chunk_counter);
373
374            // Reset the chunk state and move the chunk_counter forward.
375            self.chunk_state = crate::ChunkState::new(
376                &self.key,
377                self.chunk_state.chunk_counter + 1,
378                self.chunk_state.flags,
379                self.chunk_state.platform,
380            );
381
382            // Chunk state is complete. Move on with reading the full chunks.
383        }
384
385        while input.len() > crate::CHUNK_LEN {
386            debug_assert_eq!(self.chunk_state.len(), 0, "no partial chunk data");
387            debug_assert_eq!(crate::CHUNK_LEN.count_ones(), 1, "power of 2 chunk len");
388            let mut subtree_len = crate::largest_power_of_two_leq(input.len());
389            let count_so_far = self.chunk_state.chunk_counter * crate::CHUNK_LEN as u64;
390            // Shrink the subtree_len until it evenly divides the count so far.
391            // We know that subtree_len itself is a power of 2, so we can use a
392            // bitmasking trick instead of an actual remainder operation. (Note
393            // that if the caller consistently passes power-of-2 inputs of the
394            // same size, as is hopefully typical, this loop condition will
395            // always fail, and subtree_len will always be the full length of
396            // the input.)
397            //
398            // An aside: We don't have to shrink subtree_len quite this much.
399            // For example, if count_so_far is 1, we could pass 2 chunks to
400            // compress_subtree_to_parent_node. Since we'll get 2 CVs back,
401            // we'll still get the right answer in the end, and we might get to
402            // use 2-way SIMD parallelism. The problem with this optimization,
403            // is that it gets us stuck always hashing 2 chunks. The total
404            // number of chunks will remain odd, and we'll never graduate to
405            // higher degrees of parallelism. See
406            // https://github.com/BLAKE3-team/BLAKE3/issues/69.
407            while (subtree_len - 1) as u64 & count_so_far != 0 {
408                subtree_len /= 2;
409            }
410            // The shrunken subtree_len might now be 1 chunk long. If so, hash
411            // that one chunk by itself. Otherwise, compress the subtree into a
412            // pair of CVs.
413            let subtree_chunks = (subtree_len / crate::CHUNK_LEN) as u64;
414            if subtree_len <= crate::CHUNK_LEN {
415                debug_assert_eq!(subtree_len, crate::CHUNK_LEN);
416                self.push_cv(
417                    &crate::ChunkState::new(
418                        &self.key,
419                        self.chunk_state.chunk_counter,
420                        self.chunk_state.flags,
421                        self.chunk_state.platform,
422                    )
423                    .update(&input[..subtree_len])
424                    .output()
425                    .chaining_value(),
426                    self.chunk_state.chunk_counter,
427                );
428            } else {
429                // This is the high-performance happy path, though getting here
430                // depends on the caller giving us a long enough input.
431                let cv_pair = crate::compress_subtree_to_parent_node::<J>(
432                    &input[..subtree_len],
433                    &self.key,
434                    self.chunk_state.chunk_counter,
435                    self.chunk_state.flags,
436                    self.chunk_state.platform,
437                );
438                let left_cv = array_ref!(cv_pair, 0, 32);
439                let right_cv = array_ref!(cv_pair, 32, 32);
440                // Push the two CVs we received into the CV stack in order. Because
441                // the stack merges lazily, this guarantees we aren't merging the
442                // root.
443                self.push_cv(left_cv, self.chunk_state.chunk_counter);
444                self.push_cv(
445                    right_cv,
446                    self.chunk_state.chunk_counter + (subtree_chunks / 2),
447                );
448            }
449            self.chunk_state.chunk_counter += subtree_chunks;
450            input = &input[subtree_len..];
451        }
452
453        // What remains is 1 chunk or less. Add it to the chunk state.
454        debug_assert!(input.len() <= crate::CHUNK_LEN);
455        if !input.is_empty() {
456            self.chunk_state.update(input);
457            self.merge_cv_stack(self.chunk_state.chunk_counter);
458        }
459    }
460
461    #[inline(always)]
462    fn merge_cv_stack(&mut self, total_len: u64) {
463        // Here we diverge from the default incremental hasher implementation,
464        // the `& FLEEK_BLOCK_SIZE_IN_CHUNK_MASK` is basically `% FLEEK_BLOCK_SIZE_IN_CHUNK`,
465        // and we do this because the stack of the previous blocks are not going to be part
466        // of the block state.
467        // Because of that at the beginning of every block, we basically reset the counter,
468        // hence resetting the expected number of items already in the stack to zero as well.
469        let post_merge_stack_len =
470            (total_len & TREE_BLOCK_SIZE_IN_CHUNK_MOD_MASK).count_ones() as usize;
471        while self.cv_stack.len() > post_merge_stack_len {
472            let right_child = self.cv_stack.pop().unwrap();
473            let left_child = self.cv_stack.pop().unwrap();
474            let parent_output = crate::parent_node_output(
475                &left_child,
476                &right_child,
477                &self.key,
478                self.chunk_state.flags,
479                self.chunk_state.platform,
480            );
481            self.cv_stack.push(parent_output.chaining_value());
482        }
483    }
484
485    fn push_cv(&mut self, new_cv: &crate::CVBytes, chunk_counter: u64) {
486        self.merge_cv_stack(chunk_counter);
487        self.cv_stack.push(*new_cv);
488    }
489
490    /// Make this block state ready for the next block, clears the stack and moves the
491    /// chunk_counter by one.
492    #[inline(always)]
493    pub(crate) fn move_to_next_block(&mut self) {
494        self.cv_stack.clear();
495        self.chunk_state = crate::ChunkState::new(
496            &self.key,
497            self.chunk_state.chunk_counter + 1,
498            self.chunk_state.flags,
499            self.chunk_state.platform,
500        );
501
502        self.chunk_state.chunk_counter -=
503            self.chunk_state.chunk_counter & TREE_BLOCK_SIZE_IN_CHUNK_MOD_MASK;
504    }
505
506    /// Set the index of the block which we're hashing with this [`BlockHasher`].
507    ///
508    /// # Panics
509    ///
510    /// This function can only be called right after instantiating a [`BlockHasher`],
511    /// this will panic if it gets called after any bytes were consume by the hasher.
512    pub fn set_block(&mut self, block: usize) {
513        assert!(
514            self.chunk_state.len() == 0 && self.cv_stack.len() == 0,
515            "set_block can only be called before any calls to BlockState::update()."
516        );
517
518        self.chunk_state.chunk_counter = (block as u64) << TREE_BLOCK_SIZE_IN_CHUNK_LOG_2;
519    }
520
521    /// Returns the number of bytes this block hasher has been fed so far.
522    #[inline(always)]
523    pub fn len(&self) -> usize {
524        let mut chunk_counter =
525            (self.chunk_state.chunk_counter & TREE_BLOCK_SIZE_IN_CHUNK_MOD_MASK) as usize;
526
527        if chunk_counter == 0 && self.cv_stack.len() > 0 {
528            chunk_counter = TREE_BLOCK_SIZE_IN_CHUNK;
529        }
530
531        chunk_counter * crate::CHUNK_LEN + self.chunk_state.len()
532    }
533
534    fn final_output(&self) -> crate::Output {
535        // If the current chunk is the only chunk, that makes it the root node
536        // also. Convert it directly into an Output. Otherwise, we need to
537        // merge subtrees below.
538        if self.cv_stack.is_empty() {
539            return self.chunk_state.output();
540        }
541
542        // If there are any bytes in the ChunkState, finalize that chunk and
543        // merge its CV with everything in the CV stack. In that case, the work
544        // we did at the end of update() above guarantees that the stack
545        // doesn't contain any unmerged subtrees that need to be merged first.
546        // (This is important, because if there were two chunk hashes sitting
547        // on top of the stack, they would need to merge with each other, and
548        // merging a new chunk hash into them would be incorrect.)
549        //
550        // If there are no bytes in the ChunkState, we'll merge what's already
551        // in the stack. In this case it's fine if there are unmerged chunks on
552        // top, because we'll merge them with each other. Note that the case of
553        // the empty chunk is taken care of above.
554        let mut output: crate::Output;
555        let mut num_cvs_remaining = self.cv_stack.len();
556        if self.chunk_state.len() > 0 {
557            debug_assert_eq!(
558                self.cv_stack.len(),
559                (self.chunk_state.chunk_counter & TREE_BLOCK_SIZE_IN_CHUNK_MOD_MASK).count_ones()
560                    as usize,
561                "cv stack does not need a merge"
562            );
563            output = self.chunk_state.output();
564        } else {
565            debug_assert!(self.cv_stack.len() >= 2);
566            output = crate::parent_node_output(
567                &self.cv_stack[num_cvs_remaining - 2],
568                &self.cv_stack[num_cvs_remaining - 1],
569                &self.key,
570                self.chunk_state.flags,
571                self.chunk_state.platform,
572            );
573            num_cvs_remaining -= 2;
574        }
575        while num_cvs_remaining > 0 {
576            output = crate::parent_node_output(
577                &self.cv_stack[num_cvs_remaining - 1],
578                &output.chaining_value(),
579                &self.key,
580                self.chunk_state.flags,
581                self.chunk_state.platform,
582            );
583            num_cvs_remaining -= 1;
584        }
585        output
586    }
587
588    /// Returns the final output from the block hasher, if the file is only
589    /// composed of one block, set `is_root` to true to get the finalized
590    /// hash and not the chaining value.
591    pub fn finalize(self, is_root: bool) -> [u8; 32] {
592        if is_root {
593            self.final_output().root_hash().0
594        } else {
595            self.final_output().chaining_value()
596        }
597    }
598}
599
600impl From<IV> for BlockHasher {
601    #[inline]
602    fn from(value: IV) -> Self {
603        BlockHasher {
604            key: value.key,
605            chunk_state: crate::ChunkState::new(
606                &value.key,
607                0,
608                value.flags,
609                platform::Platform::detect(),
610            ),
611            cv_stack: ArrayVec::new(),
612        }
613    }
614}
615
616impl From<IV> for HashTreeBuilder {
617    #[inline]
618    fn from(value: IV) -> Self {
619        HashTreeBuilder {
620            tree: Vec::new(),
621            block_state: value.into(),
622            cv_stack: ArrayVec::new(),
623            block_counter: 0,
624        }
625    }
626}
627
628#[cfg(test)]
629mod tests {
630    use super::*;
631    use crate::platform;
632
633    /// The seed for our LCG randomness used for the testing.
634    const LCG_SEED: [u8; 4] = [11, 13, 17, 19];
635
636    /// A very unsafe but efficient PRNG for the purpose of testing the hash.
637    #[inline(always)]
638    fn lcg(state: [u8; 4]) -> [u8; 4] {
639        (u32::from_be_bytes(state).wrapping_mul(48271) % 0x7fffffff).to_be_bytes()
640    }
641
642    /// Fill a buffer with random bytes.
643    fn fill(mut state: [u8; 4], buffer: &mut [u8]) -> [u8; 4] {
644        for block in buffer.chunks_mut(4) {
645            state = lcg(state);
646            for i in 0..block.len() {
647                block[i] = state[i];
648            }
649        }
650        state
651    }
652
653    struct Tester {
654        hasher: crate::Hasher,
655        tree: HashTreeBuilder,
656        bytes: usize,
657    }
658
659    impl Tester {
660        pub fn new() -> Self {
661            Self {
662                hasher: crate::Hasher::new_derive_key("fleek-test"),
663                tree: IV::new_derive_key("fleek-test").into(),
664                bytes: 0,
665            }
666        }
667
668        pub fn write(&mut self, input: &[u8]) {
669            self.hasher.update(input);
670            self.tree.update(input);
671            self.bytes += input.len();
672
673            let new_block_counter = (self.bytes.saturating_sub(1)) / TREE_BLOCK_SIZE_BYTES;
674            if new_block_counter > 0 {
675                self.tree
676                    .get_block_hash(new_block_counter - 1)
677                    .expect("last block to exists.");
678            }
679
680            assert_eq!(
681                self.tree.block_state.chunk_state.chunk_counter,
682                self.hasher.chunk_state.chunk_counter
683            );
684        }
685
686        pub fn test(self) {
687            let num_blocks = (self.bytes + TREE_BLOCK_SIZE_BYTES - 1) / TREE_BLOCK_SIZE_BYTES;
688            let expected_tree_size = (2 * num_blocks).saturating_sub(1).max(1);
689            let hash = self.hasher.finalize();
690            let result = self.tree.finalize();
691            assert_eq!(result.tree.len(), expected_tree_size);
692            assert_eq!(hash, result.hash);
693            assert_eq!(result.tree[result.tree.len() - 1], hash.0);
694        }
695    }
696
697    #[test]
698    fn block_state_against_reference() {
699        let mut block_state = BlockHasher::new();
700        let mut hasher = crate::Hasher::new();
701        for i in 0..TREE_BLOCK_SIZE_BYTES {
702            block_state.update_with_join::<join::SerialJoin>(&[i as u8]);
703            hasher.update(&[i as u8]);
704        }
705        let expected = hasher.final_output().chaining_value();
706        let actual = block_state.final_output().chaining_value();
707        assert_eq!(actual, expected);
708    }
709
710    #[test]
711    fn block_state_2_block_root_hash() {
712        let mut hasher = crate::Hasher::new();
713
714        let c1 = {
715            let mut block_state = BlockHasher::new();
716            for i in 0..TREE_BLOCK_SIZE_BYTES {
717                let b = ((i + 17) % 256) as u8;
718                block_state.update(&[b]);
719                hasher.update(&[b]);
720            }
721            block_state.final_output().chaining_value()
722        };
723
724        let c2 = {
725            let mut block_state = BlockHasher::new();
726            block_state.chunk_state.chunk_counter = TREE_BLOCK_SIZE_IN_CHUNK as u64;
727
728            for i in 0..TREE_BLOCK_SIZE_BYTES {
729                let b = ((i + 5) % 256) as u8;
730                block_state.update(&[b]);
731                hasher.update(&[b]);
732            }
733
734            block_state.final_output().chaining_value()
735        };
736
737        let expected = hasher.final_output().root_hash();
738        let actual =
739            crate::parent_node_output(&c1, &c2, crate::IV, 0, platform::Platform::detect())
740                .root_hash();
741
742        assert_eq!(actual, expected);
743    }
744
745    fn block_state_3rd_block_write_by(chunk_size: usize) {
746        // Create some random looking data.
747        let mut data = [0; TREE_BLOCK_SIZE_BYTES];
748        for i in 0..TREE_BLOCK_SIZE_BYTES {
749            data[i] = i as u8;
750        }
751
752        // Hash the data as the 3rd block by writing it byte by byte.
753        let mut block_state = BlockHasher::new();
754        block_state.chunk_state.chunk_counter = 2 * TREE_BLOCK_SIZE_IN_CHUNK as u64;
755        for chunk in data.chunks(chunk_size) {
756            block_state.update(chunk);
757        }
758        let actual = block_state.final_output().chaining_value();
759
760        // Compute the same result using the trusted code.
761        let cv_pair = crate::compress_subtree_to_parent_node::<join::SerialJoin>(
762            &data,
763            crate::IV,
764            2 * TREE_BLOCK_SIZE_IN_CHUNK as u64,
765            0,
766            platform::Platform::detect(),
767        );
768
769        // Generate the root chaining value by merging the parent pair.
770        let left_cv = array_ref!(cv_pair, 0, 32);
771        let right_cv = array_ref!(cv_pair, 32, 32);
772        let expected = crate::parent_node_output(
773            &left_cv,
774            &right_cv,
775            crate::IV,
776            0,
777            platform::Platform::detect(),
778        )
779        .chaining_value();
780
781        assert_eq!(actual, expected);
782    }
783
784    #[test]
785    fn block_state_3rd_block_byte_by_byte() {
786        block_state_3rd_block_write_by(1);
787        block_state_3rd_block_write_by(2);
788        block_state_3rd_block_write_by(64);
789        block_state_3rd_block_write_by(65);
790    }
791
792    #[test]
793    fn block_state_3rd_block_by_chunks() {
794        for i in 1..=TREE_BLOCK_SIZE_IN_CHUNK {
795            block_state_3rd_block_write_by(i * crate::CHUNK_LEN);
796            block_state_3rd_block_write_by(i * crate::CHUNK_LEN + 1);
797            block_state_3rd_block_write_by(i * crate::CHUNK_LEN - 1);
798        }
799    }
800
801    #[test]
802    fn empty_hash() {
803        let tester = Tester::new();
804        tester.test();
805    }
806
807    #[test]
808    fn one_byte() {
809        let mut tester = Tester::new();
810        tester.write(&[0]);
811        tester.test();
812
813        let mut tester = Tester::new();
814        tester.write(&[17]);
815        tester.test();
816    }
817
818    #[test]
819    fn one_byte_one_chunk() {
820        let mut tester = Tester::new();
821        tester.write(&[3]);
822        let mut chunk = [0; crate::CHUNK_LEN];
823        fill(LCG_SEED, &mut chunk);
824        tester.write(&chunk);
825        tester.test();
826    }
827
828    #[test]
829    fn one_chunk_one_byte() {
830        let mut tester = Tester::new();
831        let mut chunk = [0; crate::CHUNK_LEN];
832        fill(LCG_SEED, &mut chunk);
833        tester.write(&chunk);
834        tester.write(&[3]);
835        tester.test();
836    }
837
838    #[test]
839    fn one_chunk() {
840        let mut tester = Tester::new();
841        let mut chunk = [0; crate::CHUNK_LEN];
842        fill(LCG_SEED, &mut chunk);
843        tester.write(&chunk);
844        tester.test();
845    }
846
847    #[test]
848    fn one_block() {
849        let mut tester = Tester::new();
850        let mut chunk = [0; TREE_BLOCK_SIZE_BYTES];
851        fill(LCG_SEED, &mut chunk);
852        tester.write(&chunk);
853        tester.test();
854    }
855
856    #[test]
857    fn two_block() {
858        let mut tester = Tester::new();
859        let mut chunk = [0; 2 * TREE_BLOCK_SIZE_BYTES];
860        fill(LCG_SEED, &mut chunk);
861        tester.write(&chunk);
862        tester.test();
863    }
864
865    #[test]
866    fn three_block() {
867        let mut tester = Tester::new();
868        let mut chunk = [0; 3 * TREE_BLOCK_SIZE_BYTES];
869        fill(LCG_SEED, &mut chunk);
870        tester.write(&chunk);
871        tester.test();
872    }
873
874    #[test]
875    fn four_block() {
876        let mut tester = Tester::new();
877        let mut chunk = [0; 4 * TREE_BLOCK_SIZE_BYTES];
878        fill(LCG_SEED, &mut chunk);
879        tester.write(&chunk);
880        tester.test();
881    }
882
883    #[test]
884    fn two_block_byte_by_byte() {
885        let mut tester = Tester::new();
886        let mut block = [0; 2 * TREE_BLOCK_SIZE_BYTES];
887        fill(LCG_SEED, &mut block);
888
889        for byte in block.chunks(1) {
890            tester.write(byte);
891        }
892
893        tester.test();
894    }
895
896    #[test]
897    fn one_block_byte_by_byte() {
898        let mut tester = Tester::new();
899        let mut block = [0; 2 * TREE_BLOCK_SIZE_BYTES];
900        fill(LCG_SEED, &mut block);
901
902        for byte in block.chunks(1) {
903            tester.write(byte);
904        }
905
906        tester.test();
907    }
908
909    #[test]
910    fn two_block_one_byte() {
911        let mut tester = Tester::new();
912        let mut block = [0; 2 * TREE_BLOCK_SIZE_BYTES];
913        fill(LCG_SEED, &mut block);
914        tester.write(&block);
915        tester.write(&[0]);
916        tester.test();
917    }
918
919    #[test]
920    fn four_block_by_byte() {
921        let mut tester = Tester::new();
922        let mut block = [0; 2 * TREE_BLOCK_SIZE_BYTES];
923        fill(LCG_SEED, &mut block);
924        for byte in block.chunks(1) {
925            tester.write(byte);
926        }
927        tester.write(&[0]);
928        tester.test();
929    }
930
931    #[test]
932    fn two_block_by_half_block() {
933        let mut tester = Tester::new();
934        let mut block = [0; 2 * TREE_BLOCK_SIZE_BYTES];
935        fill(LCG_SEED, &mut block);
936
937        for byte in block.chunks(TREE_BLOCK_SIZE_BYTES / 2) {
938            tester.write(byte);
939        }
940
941        tester.test();
942    }
943
944    #[test]
945    fn two_block_by_half_block_irregular() {
946        let mut tester = Tester::new();
947        let mut block = [0; 2 * TREE_BLOCK_SIZE_BYTES];
948        fill(LCG_SEED, &mut block);
949        for (i, byte) in block.chunks(TREE_BLOCK_SIZE_BYTES / 2).enumerate() {
950            tester.write(byte);
951            if i % 17 == 0 {
952                tester.write(&[0]);
953            }
954
955            if i % 11 == 0 {
956                tester.write(&[1, 2]);
957            }
958        }
959        tester.test();
960    }
961
962    #[test]
963    fn fuzz() {
964        let mut state = LCG_SEED;
965
966        for mut n in 0..(1 << 12) {
967            let mut tester = Tester::new();
968            for _ in 0..4 {
969                // Get the data for this round.
970                let mut data = [0; 2 * TREE_BLOCK_SIZE_BYTES];
971                state = fill(state, &mut data);
972
973                // Get the write mode.
974                let test = n & 7;
975                n >>= 3;
976
977                match test {
978                    // Skip
979                    0 => {}
980                    // By byte
981                    1 => {
982                        for byte in data.chunks(1) {
983                            tester.write(byte);
984                        }
985                    }
986                    // By chunk
987                    2 => {
988                        for byte in data.chunks(crate::CHUNK_LEN) {
989                            tester.write(byte);
990                        }
991                    }
992                    // By 1/2 block
993                    3 => {
994                        for byte in data.chunks(TREE_BLOCK_SIZE_BYTES / 2) {
995                            tester.write(byte);
996                        }
997                    }
998                    // By block
999                    4 => {
1000                        for byte in data.chunks(TREE_BLOCK_SIZE_BYTES) {
1001                            tester.write(byte);
1002                        }
1003                    }
1004                    // By 2 block (full)
1005                    5 => {
1006                        for byte in data.chunks(2 * TREE_BLOCK_SIZE_BYTES) {
1007                            tester.write(byte);
1008                        }
1009                    }
1010                    // Trailing byte
1011                    6 => {
1012                        for byte in data.chunks(2 * TREE_BLOCK_SIZE_BYTES) {
1013                            tester.write(byte);
1014                        }
1015                        tester.write(&[0x00]);
1016                    }
1017                    // Leading byte
1018                    7 => {
1019                        tester.write(&[0x00]);
1020                        for byte in data.chunks(2 * TREE_BLOCK_SIZE_BYTES) {
1021                            tester.write(byte);
1022                        }
1023                    }
1024                    _ => unreachable!(),
1025                }
1026            }
1027            tester.test();
1028        }
1029    }
1030
1031    #[test]
1032    fn first_100kb() {
1033        for size in 0..=(100 << 10) {
1034            let mut tester = Tester::new();
1035            let mut block = vec![0; size];
1036            fill(LCG_SEED, &mut block);
1037            tester.write(&block);
1038            tester.test();
1039        }
1040    }
1041
1042    #[test]
1043    fn first_1mb_with_10kb_jump() {
1044        for base in (0usize..=(1 << 20)).step_by(10 << 10) {
1045            for size in base.saturating_sub(50)..base + 50 {
1046                let mut tester = Tester::new();
1047                let mut block = vec![0; size];
1048                fill(LCG_SEED, &mut block);
1049                tester.write(&block);
1050                tester.test();
1051            }
1052        }
1053    }
1054
1055    #[test]
1056    fn test_block_boundary() {
1057        for size in (0..10).map(|n| n * TREE_BLOCK_SIZE_BYTES) {
1058            let mut content = vec![0; size.saturating_sub(50)];
1059            fill(LCG_SEED, &mut content);
1060
1061            for _ in size.saturating_sub(50)..size + 50 {
1062                let mut tester = Tester::new();
1063                tester.write(&content);
1064                tester.test();
1065                content.push(0);
1066            }
1067        }
1068    }
1069
1070    #[test]
1071    fn test_2block_one_byte() {
1072        let mut content = vec![0; 2 * TREE_BLOCK_SIZE_BYTES + 1];
1073        fill(LCG_SEED, &mut content);
1074        let mut tester = Tester::new();
1075        tester.write(&content);
1076        tester.test();
1077    }
1078}