casper_types/
digest.rs

1//! Contains digest and merkle chunking used throughout the system.
2
3mod chunk_with_proof;
4mod error;
5mod indexed_merkle_proof;
6
7use alloc::{collections::BTreeMap, string::String, vec::Vec};
8use core::{
9    array::TryFromSliceError,
10    convert::{TryFrom, TryInto},
11    fmt::{self, Debug, Display, Formatter, LowerHex, UpperHex},
12};
13
14use blake2::{
15    digest::{Update, VariableOutput},
16    VarBlake2b,
17};
18#[cfg(feature = "datasize")]
19use datasize::DataSize;
20use hex_fmt::HexFmt;
21use itertools::Itertools;
22#[cfg(feature = "once_cell")]
23use once_cell::sync::OnceCell;
24#[cfg(any(feature = "testing", test))]
25use rand::{
26    distributions::{Distribution, Standard},
27    Rng,
28};
29#[cfg(feature = "json-schema")]
30use schemars::JsonSchema;
31use serde::{de::Error as SerdeError, Deserialize, Deserializer, Serialize, Serializer};
32
33#[cfg(any(feature = "testing", test))]
34use crate::testing::TestRng;
35use crate::{
36    bytesrepr::{self, FromBytes, ToBytes},
37    checksummed_hex, CLType, CLTyped,
38};
39pub use chunk_with_proof::ChunkWithProof;
40pub use error::{
41    ChunkWithProofVerificationError, Error as DigestError, MerkleConstructionError,
42    MerkleVerificationError,
43};
44pub use indexed_merkle_proof::IndexedMerkleProof;
45
46/// The output of the hash function.
47#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default)]
48#[cfg_attr(feature = "datasize", derive(DataSize))]
49#[cfg_attr(
50    feature = "json-schema",
51    derive(JsonSchema),
52    schemars(description = "Hex-encoded hash digest.")
53)]
54pub struct Digest(
55    #[cfg_attr(feature = "json-schema", schemars(skip, with = "String"))]
56    pub(super)  [u8; Digest::LENGTH],
57);
58
59const CHUNK_DATA_ZEROED: &[u8] = &[0u8; ChunkWithProof::CHUNK_SIZE_BYTES];
60
61impl Digest {
62    /// The number of bytes in a `Digest`.
63    pub const LENGTH: usize = 32;
64
65    /// Sentinel hash to be used for hashing options in the case of `None`.
66    pub const SENTINEL_NONE: Digest = Digest([0u8; Digest::LENGTH]);
67    /// Sentinel hash to be used by `hash_slice_rfold`. Terminates the fold.
68    pub const SENTINEL_RFOLD: Digest = Digest([1u8; Digest::LENGTH]);
69    /// Sentinel hash to be used by `hash_merkle_tree` in the case of an empty list.
70    pub const SENTINEL_MERKLE_TREE: Digest = Digest([2u8; Digest::LENGTH]);
71
72    /// Creates a 32-byte BLAKE2b hash digest from a given a piece of data.
73    #[inline(always)]
74    pub fn hash<T: AsRef<[u8]>>(data: T) -> Digest {
75        Self::blake2b_hash(data)
76    }
77
78    /// Creates a 32-byte BLAKE2b hash digest from a given a piece of data
79    pub(crate) fn blake2b_hash<T: AsRef<[u8]>>(data: T) -> Digest {
80        let mut ret = [0u8; Digest::LENGTH];
81        // NOTE: Safe to unwrap here because our digest length is constant and valid
82        let mut hasher = VarBlake2b::new(Digest::LENGTH).unwrap();
83        hasher.update(data);
84        hasher.finalize_variable(|hash| ret.clone_from_slice(hash));
85        Digest(ret)
86    }
87
88    /// Hashes a pair of byte slices.
89    pub fn hash_pair<T: AsRef<[u8]>, U: AsRef<[u8]>>(data1: T, data2: U) -> Digest {
90        let mut result = [0; Digest::LENGTH];
91        let mut hasher = VarBlake2b::new(Digest::LENGTH).unwrap();
92        hasher.update(data1);
93        hasher.update(data2);
94        hasher.finalize_variable(|slice| {
95            result.copy_from_slice(slice);
96        });
97        Digest(result)
98    }
99
100    /// Hashes a raw Merkle root and leaf count to firm the final Merkle hash.
101    ///
102    /// To avoid pre-image attacks, the final hash that is based upon the number of leaves in the
103    /// Merkle tree and the root hash is prepended with a padding to ensure it is longer than the
104    /// actual chunk size.
105    ///
106    /// Without this feature, an attacker could construct an item that is only a few bytes long but
107    /// hashes to the same value as a much longer, chunked item by hashing `(len || root hash of
108    /// longer item's Merkle tree root)`.
109    ///
110    /// This function computes the correct final hash by ensuring the hasher used has been
111    /// initialized with padding before.
112    ///
113    /// With `once_cell` feature enabled (generally done by enabling `std` feature), for efficiency
114    /// reasons it uses a memoized hasher state computed on first run and cloned afterwards.
115    fn hash_merkle_root(leaf_count: u64, root: Digest) -> Digest {
116        #[cfg(feature = "once_cell")]
117        static PAIR_PREFIX_HASHER: OnceCell<VarBlake2b> = OnceCell::new();
118
119        let mut result = [0; Digest::LENGTH];
120        let get_hasher = || {
121            let mut hasher = VarBlake2b::new(Digest::LENGTH).unwrap();
122            hasher.update(CHUNK_DATA_ZEROED);
123            hasher
124        };
125        #[cfg(feature = "once_cell")]
126        let mut hasher = PAIR_PREFIX_HASHER.get_or_init(get_hasher).clone();
127        #[cfg(not(feature = "once_cell"))]
128        let mut hasher = get_hasher();
129
130        hasher.update(leaf_count.to_le_bytes());
131        hasher.update(root);
132        hasher.finalize_variable(|slice| {
133            result.copy_from_slice(slice);
134        });
135        Digest(result)
136    }
137
138    /// Returns the underlying BLAKE2b hash bytes
139    pub fn value(&self) -> [u8; Digest::LENGTH] {
140        self.0
141    }
142
143    /// Converts the underlying BLAKE2b hash digest array to a `Vec`
144    pub fn into_vec(self) -> Vec<u8> {
145        self.0.to_vec()
146    }
147
148    /// Hashes an `impl IntoIterator` of [`Digest`]s into a single [`Digest`] by
149    /// constructing a [Merkle tree][1]. Reduces pairs of elements in the collection by repeatedly
150    /// calling [Digest::hash_pair].
151    ///
152    /// The pattern of hashing is as follows. It is akin to [graph reduction][2]:
153    ///
154    /// ```text
155    /// 1 2 4 5 8 9
156    /// │ │ │ │ │ │
157    /// └─3 └─6 └─10
158    ///   │   │   │
159    ///   └───7   │
160    ///       │   │
161    ///       └───11
162    /// ```
163    ///
164    /// Finally hashes the number of elements with the resulting hash. In the example above the
165    /// final output would be `hash_pair(6_u64.to_le_bytes(), l)`.
166    ///
167    /// Returns [`Digest::SENTINEL_MERKLE_TREE`] when the input is empty.
168    ///
169    /// [1]: https://en.wikipedia.org/wiki/Merkle_tree
170    /// [2]: https://en.wikipedia.org/wiki/Graph_reduction
171    pub fn hash_merkle_tree<I>(leaves: I) -> Digest
172    where
173        I: IntoIterator<Item = Digest>,
174        I::IntoIter: ExactSizeIterator,
175    {
176        let leaves = leaves.into_iter();
177        let leaf_count = leaves.len() as u64;
178
179        leaves.tree_fold1(Digest::hash_pair).map_or_else(
180            || Digest::SENTINEL_MERKLE_TREE,
181            |raw_root| Digest::hash_merkle_root(leaf_count, raw_root),
182        )
183    }
184
185    /// Hashes a `BTreeMap`.
186    pub fn hash_btree_map<K, V>(btree_map: &BTreeMap<K, V>) -> Result<Digest, bytesrepr::Error>
187    where
188        K: ToBytes,
189        V: ToBytes,
190    {
191        let mut kv_hashes: Vec<Digest> = Vec::with_capacity(btree_map.len());
192        for (key, value) in btree_map.iter() {
193            kv_hashes.push(Digest::hash_pair(
194                Digest::hash(key.to_bytes()?),
195                Digest::hash(value.to_bytes()?),
196            ))
197        }
198        Ok(Self::hash_merkle_tree(kv_hashes))
199    }
200
201    /// Hashes a `&[Digest]` using a [right fold][1].
202    ///
203    /// This pattern of hashing is as follows:
204    ///
205    /// ```text
206    /// hash_pair(a, &hash_pair(b, &hash_pair(c, &SENTINEL_RFOLD)))
207    /// ```
208    ///
209    /// Unlike Merkle trees, this is suited to hashing heterogeneous lists we may wish to extend in
210    /// the future (ie, hashes of data structures that may undergo revision).
211    ///
212    /// Returns [`Digest::SENTINEL_RFOLD`] when given an empty slice as input.
213    ///
214    /// [1]: https://en.wikipedia.org/wiki/Fold_(higher-order_function)#Linear_folds
215    pub fn hash_slice_rfold(slice: &[Digest]) -> Digest {
216        Self::hash_slice_with_proof(slice, Self::SENTINEL_RFOLD)
217    }
218
219    /// Hashes a `&[Digest]` using a [right fold][1]. Uses `proof` as a Merkle proof for the
220    /// missing tail of the slice.
221    ///
222    /// [1]: https://en.wikipedia.org/wiki/Fold_(higher-order_function)#Linear_folds
223    pub fn hash_slice_with_proof(slice: &[Digest], proof: Digest) -> Digest {
224        slice
225            .iter()
226            .rfold(proof, |prev, next| Digest::hash_pair(next, prev))
227    }
228
229    /// Returns a `Digest` parsed from a hex-encoded `Digest`.
230    pub fn from_hex<T: AsRef<[u8]>>(hex_input: T) -> Result<Self, DigestError> {
231        let bytes = checksummed_hex::decode(&hex_input).map_err(DigestError::Base16DecodeError)?;
232        let slice: [u8; Self::LENGTH] = bytes
233            .try_into()
234            .map_err(|_| DigestError::IncorrectDigestLength(hex_input.as_ref().len()))?;
235        Ok(Digest(slice))
236    }
237
238    /// Hash data into chunks if necessary.
239    pub fn hash_into_chunks_if_necessary(bytes: &[u8]) -> Digest {
240        if bytes.len() <= ChunkWithProof::CHUNK_SIZE_BYTES {
241            Digest::blake2b_hash(bytes)
242        } else {
243            Digest::hash_merkle_tree(
244                bytes
245                    .chunks(ChunkWithProof::CHUNK_SIZE_BYTES)
246                    .map(Digest::blake2b_hash),
247            )
248        }
249    }
250
251    /// Returns a new `Digest` directly initialized with the provided bytes; no hashing is done.
252    ///
253    /// This is equivalent to `Digest::from`, but is a const function.
254    pub const fn from_raw(raw_digest: [u8; Self::LENGTH]) -> Self {
255        Digest(raw_digest)
256    }
257
258    /// Returns a random `Digest`.
259    #[cfg(any(feature = "testing", test))]
260    pub fn random(rng: &mut TestRng) -> Self {
261        Digest(rng.gen())
262    }
263}
264
265impl CLTyped for Digest {
266    fn cl_type() -> CLType {
267        CLType::ByteArray(Digest::LENGTH as u32)
268    }
269}
270
271#[cfg(any(feature = "testing", test))]
272impl Distribution<Digest> for Standard {
273    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Digest {
274        Digest(rng.gen())
275    }
276}
277
278impl LowerHex for Digest {
279    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
280        let hex_string = base16::encode_lower(&self.value());
281        if f.alternate() {
282            write!(f, "0x{}", hex_string)
283        } else {
284            write!(f, "{}", hex_string)
285        }
286    }
287}
288
289impl UpperHex for Digest {
290    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
291        let hex_string = base16::encode_upper(&self.value());
292        if f.alternate() {
293            write!(f, "0x{}", hex_string)
294        } else {
295            write!(f, "{}", hex_string)
296        }
297    }
298}
299
300impl Display for Digest {
301    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
302        write!(f, "{:10}", HexFmt(&self.0))
303    }
304}
305
306impl Debug for Digest {
307    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
308        write!(f, "{}", base16::encode_lower(&self.0))
309    }
310}
311
312impl From<[u8; Digest::LENGTH]> for Digest {
313    fn from(arr: [u8; Digest::LENGTH]) -> Self {
314        Digest(arr)
315    }
316}
317
318impl TryFrom<&[u8]> for Digest {
319    type Error = TryFromSliceError;
320
321    fn try_from(slice: &[u8]) -> Result<Digest, Self::Error> {
322        <[u8; Digest::LENGTH]>::try_from(slice).map(Digest)
323    }
324}
325
326impl AsRef<[u8]> for Digest {
327    fn as_ref(&self) -> &[u8] {
328        self.0.as_ref()
329    }
330}
331
332impl From<Digest> for [u8; Digest::LENGTH] {
333    fn from(hash: Digest) -> Self {
334        hash.0
335    }
336}
337
338impl ToBytes for Digest {
339    #[inline(always)]
340    fn to_bytes(&self) -> Result<Vec<u8>, bytesrepr::Error> {
341        self.0.to_bytes()
342    }
343
344    #[inline(always)]
345    fn serialized_length(&self) -> usize {
346        self.0.serialized_length()
347    }
348
349    #[inline(always)]
350    fn write_bytes(&self, writer: &mut Vec<u8>) -> Result<(), bytesrepr::Error> {
351        writer.extend_from_slice(&self.0);
352        Ok(())
353    }
354}
355
356impl FromBytes for Digest {
357    #[inline(always)]
358    fn from_bytes(bytes: &[u8]) -> Result<(Self, &[u8]), bytesrepr::Error> {
359        FromBytes::from_bytes(bytes).map(|(arr, rem)| (Digest(arr), rem))
360    }
361}
362
363impl Serialize for Digest {
364    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
365        if serializer.is_human_readable() {
366            base16::encode_lower(&self.0).serialize(serializer)
367        } else {
368            // This is to keep backwards compatibility with how HexForm encodes
369            // byte arrays. HexForm treats this like a slice.
370            self.0[..].serialize(serializer)
371        }
372    }
373}
374
375impl<'de> Deserialize<'de> for Digest {
376    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
377        if deserializer.is_human_readable() {
378            let hex_string = String::deserialize(deserializer)?;
379            let bytes =
380                checksummed_hex::decode(hex_string.as_bytes()).map_err(SerdeError::custom)?;
381            let data =
382                <[u8; Digest::LENGTH]>::try_from(bytes.as_ref()).map_err(SerdeError::custom)?;
383            Ok(Digest::from(data))
384        } else {
385            let data = <Vec<u8>>::deserialize(deserializer)?;
386            Digest::try_from(data.as_slice()).map_err(D::Error::custom)
387        }
388    }
389}
390
391#[cfg(test)]
392mod tests {
393    use std::{collections::BTreeMap, iter};
394
395    use proptest_attr_macro::proptest;
396
397    use super::Digest;
398
399    use crate::{
400        bytesrepr::{self, ToBytes},
401        ChunkWithProof,
402    };
403
404    #[proptest]
405    fn bytesrepr_roundtrip(hash: [u8; Digest::LENGTH]) {
406        let digest = Digest(hash);
407        bytesrepr::test_serialization_roundtrip(&digest);
408    }
409
410    #[proptest]
411    fn serde_roundtrip(hash: [u8; Digest::LENGTH]) {
412        let preser_digest = Digest(hash);
413        let serialized = serde_json::to_string(&preser_digest).unwrap();
414        let deser_digest: Digest = serde_json::from_str(&serialized).unwrap();
415        assert_eq!(preser_digest, deser_digest);
416    }
417
418    #[test]
419    fn serde_custom_serialization() {
420        let serialized = serde_json::to_string(&Digest::SENTINEL_RFOLD).unwrap();
421        let expected = format!("\"{:?}\"", Digest::SENTINEL_RFOLD);
422        assert_eq!(expected, serialized);
423    }
424
425    #[test]
426    fn hash_known() {
427        // Data of length less or equal to [ChunkWithProof::CHUNK_SIZE_BYTES]
428        // are hashed using Blake2B algorithm.
429        // Larger data are chunked and Merkle tree hash is calculated.
430        //
431        // Please note that [ChunkWithProof::CHUNK_SIZE_BYTES] is `test` configuration
432        // is smaller than in production, to allow testing with more chunks
433        // with still reasonable time and memory consumption.
434        //
435        // See: [Digest::hash]
436        let inputs_and_digests = [
437            (
438                "",
439                "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8",
440            ),
441            (
442                "abc",
443                "bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319",
444            ),
445            (
446                "0123456789",
447                "7b6cb8d374484e221785288b035dc53fc9ddf000607f473fc2a3258d89a70398",
448            ),
449            (
450                "01234567890",
451                "3d199478c18b7fe3ca1f4f2a9b3e07f708ff66ed52eb345db258abe8a812ed5c",
452            ),
453            (
454                "The quick brown fox jumps over the lazy dog",
455                "01718cec35cd3d796dd00020e0bfecb473ad23457d063b75eff29c0ffa2e58a9",
456            ),
457        ];
458        for (known_input, expected_digest) in &inputs_and_digests {
459            let known_input: &[u8] = known_input.as_ref();
460            assert_eq!(*expected_digest, format!("{:?}", Digest::hash(known_input)));
461        }
462    }
463
464    #[test]
465    fn from_valid_hex_should_succeed() {
466        for char in "abcdefABCDEF0123456789".chars() {
467            let input: String = iter::repeat(char).take(64).collect();
468            assert!(Digest::from_hex(input).is_ok());
469        }
470    }
471
472    #[test]
473    fn from_hex_invalid_length_should_fail() {
474        for len in &[2_usize, 62, 63, 65, 66] {
475            let input: String = "f".repeat(*len);
476            assert!(Digest::from_hex(input).is_err());
477        }
478    }
479
480    #[test]
481    fn from_hex_invalid_char_should_fail() {
482        for char in "g %-".chars() {
483            let input: String = iter::repeat('f').take(63).chain(iter::once(char)).collect();
484            assert!(Digest::from_hex(input).is_err());
485        }
486    }
487
488    #[test]
489    fn should_display_digest_in_hex() {
490        let hash = Digest([0u8; 32]);
491        let hash_hex = format!("{:?}", hash);
492        assert_eq!(
493            hash_hex,
494            "0000000000000000000000000000000000000000000000000000000000000000"
495        );
496    }
497
498    #[test]
499    fn should_print_digest_lower_hex() {
500        let hash = Digest([10u8; 32]);
501        let hash_lower_hex = format!("{:x}", hash);
502        assert_eq!(
503            hash_lower_hex,
504            "0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a"
505        )
506    }
507
508    #[test]
509    fn should_print_digest_upper_hex() {
510        let hash = Digest([10u8; 32]);
511        let hash_upper_hex = format!("{:X}", hash);
512        assert_eq!(
513            hash_upper_hex,
514            "0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A"
515        )
516    }
517
518    #[test]
519    fn alternate_should_prepend_0x() {
520        let hash = Digest([0u8; 32]);
521        let hash_hex_alt = format!("{:#x}", hash);
522        assert_eq!(
523            hash_hex_alt,
524            "0x0000000000000000000000000000000000000000000000000000000000000000"
525        )
526    }
527
528    #[test]
529    fn test_hash_pair() {
530        let hash1 = Digest([1u8; 32]);
531        let hash2 = Digest([2u8; 32]);
532
533        let hash = Digest::hash_pair(hash1, hash2);
534        let hash_lower_hex = format!("{:x}", hash);
535
536        assert_eq!(
537            hash_lower_hex,
538            "30b600fb1f0cc0b3f0fc28cdcb7389405a6659be81c7d5c5905725aa3a5119ce"
539        );
540    }
541
542    #[test]
543    fn test_hash_rfold() {
544        let hashes = [
545            Digest([1u8; 32]),
546            Digest([2u8; 32]),
547            Digest([3u8; 32]),
548            Digest([4u8; 32]),
549            Digest([5u8; 32]),
550        ];
551
552        let hash = Digest::hash_slice_rfold(&hashes[..]);
553        let hash_lower_hex = format!("{:x}", hash);
554
555        assert_eq!(
556            hash_lower_hex,
557            "e137f4eb94d2387065454eecfe2cdb5584e3dbd5f1ca07fc511fffd13d234e8e"
558        );
559
560        let proof = Digest::hash_slice_rfold(&hashes[2..]);
561        let hash_proof = Digest::hash_slice_with_proof(&hashes[..2], proof);
562
563        assert_eq!(hash, hash_proof);
564    }
565
566    #[test]
567    fn test_hash_merkle_odd() {
568        let hashes = vec![
569            Digest([1u8; 32]),
570            Digest([2u8; 32]),
571            Digest([3u8; 32]),
572            Digest([4u8; 32]),
573            Digest([5u8; 32]),
574        ];
575
576        let hash = Digest::hash_merkle_tree(hashes);
577        let hash_lower_hex = format!("{:x}", hash);
578
579        assert_eq!(
580            hash_lower_hex,
581            "775cec8133b97b0e8d4e97659025d5bac4ed7c8927d1bd99cf62114df57f3e74"
582        );
583    }
584
585    #[test]
586    fn test_hash_merkle_even() {
587        let hashes = vec![
588            Digest([1u8; 32]),
589            Digest([2u8; 32]),
590            Digest([3u8; 32]),
591            Digest([4u8; 32]),
592            Digest([5u8; 32]),
593            Digest([6u8; 32]),
594        ];
595
596        let hash = Digest::hash_merkle_tree(hashes);
597        let hash_lower_hex = format!("{:x}", hash);
598
599        assert_eq!(
600            hash_lower_hex,
601            "4bd50b08a8366b28c35bc831b95d147123bad01c29ffbf854b659c4b3ea4086c"
602        );
603    }
604
605    #[test]
606    fn test_hash_btreemap() {
607        let mut map = BTreeMap::new();
608        let _ = map.insert(Digest([1u8; 32]), Digest([2u8; 32]));
609        let _ = map.insert(Digest([3u8; 32]), Digest([4u8; 32]));
610        let _ = map.insert(Digest([5u8; 32]), Digest([6u8; 32]));
611        let _ = map.insert(Digest([7u8; 32]), Digest([8u8; 32]));
612        let _ = map.insert(Digest([9u8; 32]), Digest([10u8; 32]));
613
614        let hash = Digest::hash_btree_map(&map).unwrap();
615        let hash_lower_hex = format!("{:x}", hash);
616
617        assert_eq!(
618            hash_lower_hex,
619            "fd1214a627473ffc6d6cc97e7012e6344d74abbf987b48cde5d0642049a0db98"
620        );
621    }
622
623    #[test]
624    fn digest_deserialize_regression() {
625        let input = Digest([0; 32]);
626        let serialized = bincode::serialize(&input).expect("failed to serialize.");
627
628        let expected = vec![
629            32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631        ];
632
633        assert_eq!(expected, serialized);
634    }
635
636    #[test]
637    fn should_assert_simple_digest_serialization_format() {
638        let digest_bytes = [0; 32];
639
640        assert_eq!(
641            Digest(digest_bytes).to_bytes().unwrap(),
642            digest_bytes.to_vec()
643        );
644    }
645
646    #[test]
647    fn merkle_roots_are_preimage_resistent() {
648        // Input data is two chunks long.
649        //
650        // The resulting tree will look like this:
651        //
652        // 1..0  a..j
653        // │     │
654        // └─────── R
655        //
656        // The Merkle root is thus: R = h( h(1..0) || h(a..j) )
657        //
658        // h(1..0) = 807f1ba73147c3a96c2d63b38dd5a5f514f66290a1436bb9821e9f2a72eff263
659        // h(a..j) = 499e1cdb476523fedafc9d9db31125e2744f271578ea95b16ab4bd1905f05fea
660        // R=h(h(1..0)||h(a..j)) = 1319394a98d0cb194f960e3748baeb2045a9ec28aa51e0d42011be43f4a91f5f
661        // h(2u64le || R) = c31f0bb6ef569354d1a26c3a51f1ad4b6d87cef7f73a290ab6be8db6a9c7d4ee
662        //
663        // The final step is to hash h(2u64le || R), which is the length as little endian
664        // concatenated with the root.
665
666        // Constants used here assume a chunk size of 10 bytes.
667        assert_eq!(ChunkWithProof::CHUNK_SIZE_BYTES, 10);
668
669        let long_data = b"1234567890abcdefghij";
670        assert_eq!(long_data.len(), ChunkWithProof::CHUNK_SIZE_BYTES * 2);
671
672        // The `long_data_hash` is constructed manually here, as `Digest::hash` still had
673        // deactivated chunking code at the time this test was written.
674        let long_data_hash = Digest::hash_merkle_tree(
675            long_data
676                .as_ref()
677                .chunks(ChunkWithProof::CHUNK_SIZE_BYTES)
678                .map(Digest::blake2b_hash),
679        );
680
681        // The concatenation of `2u64` in little endian + the Merkle root hash `R`. Note that this
682        // is a valid hashable object on its own.
683        let maybe_colliding_short_data = [
684            2, 0, 0, 0, 0, 0, 0, 0, 19, 25, 57, 74, 152, 208, 203, 25, 79, 150, 14, 55, 72, 186,
685            235, 32, 69, 169, 236, 40, 170, 81, 224, 212, 32, 17, 190, 67, 244, 169, 31, 95,
686        ];
687
688        // Use `blake2b_hash` to work around the issue of the chunk size being shorter than the
689        // digest length.
690        let short_data_hash = Digest::blake2b_hash(maybe_colliding_short_data);
691
692        // Ensure there is no collision. You can verify this test is correct by temporarily changing
693        // the `Digest::hash_merkle_tree` function to use the unpadded `hash_pair` function, instead
694        // of `hash_merkle_root`.
695        assert_ne!(long_data_hash, short_data_hash);
696
697        // The expected input for the root hash is the colliding data, but prefixed with a full
698        // chunk of zeros.
699        let expected_final_hash_input = [
700            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 19, 25, 57, 74, 152, 208, 203,
701            25, 79, 150, 14, 55, 72, 186, 235, 32, 69, 169, 236, 40, 170, 81, 224, 212, 32, 17,
702            190, 67, 244, 169, 31, 95,
703        ];
704        assert_eq!(
705            Digest::blake2b_hash(expected_final_hash_input),
706            long_data_hash
707        );
708
709        // Another way to specify this sanity check is to say that the short and long data should
710        // hash differently.
711        //
712        // Note: This condition is true at the time of writing this test, where chunk hashing is
713        //       disabled. It should still hold true once enabled.
714        assert_ne!(
715            Digest::hash(maybe_colliding_short_data),
716            Digest::hash(long_data)
717        );
718
719        // In a similar manner, the internal padded data should also not hash equal to either, as it
720        // should be hashed using the chunking function.
721        assert_ne!(
722            Digest::hash(maybe_colliding_short_data),
723            Digest::hash(expected_final_hash_input)
724        );
725        assert_ne!(
726            Digest::hash(long_data),
727            Digest::hash(expected_final_hash_input)
728        );
729    }
730}