casper_hashing/
lib.rs

1//! A library providing hashing functionality including Merkle Proof utilities.
2#![doc(html_root_url = "https://docs.rs/casper-hashing/3.0.0")]
3#![doc(
4    html_favicon_url = "https://raw.githubusercontent.com/casper-network/casper-node/blob/dev/images/Casper_Logo_Favicon_48.png",
5    html_logo_url = "https://raw.githubusercontent.com/casper-network/casper-node/blob/dev/images/Casper_Logo_Favicon.png",
6    test(attr(forbid(warnings)))
7)]
8#![warn(missing_docs)]
9
10mod chunk_with_proof;
11mod error;
12mod indexed_merkle_proof;
13
14use std::{
15    array::TryFromSliceError,
16    collections::BTreeMap,
17    convert::{TryFrom, TryInto},
18    fmt::{self, Debug, Display, Formatter, LowerHex, UpperHex},
19};
20
21use blake2::{
22    digest::{Update, VariableOutput},
23    VarBlake2b,
24};
25use datasize::DataSize;
26use hex_fmt::HexFmt;
27use itertools::Itertools;
28use once_cell::sync::OnceCell;
29#[cfg(test)]
30use rand::{distributions::Standard, prelude::Distribution, Rng};
31use schemars::JsonSchema;
32use serde::{de::Error as SerdeError, Deserialize, Deserializer, Serialize, Serializer};
33
34use casper_types::{
35    bytesrepr::{self, FromBytes, ToBytes},
36    checksummed_hex, CLType, CLTyped,
37};
38pub use chunk_with_proof::ChunkWithProof;
39pub use error::{
40    ChunkWithProofVerificationError, Error, MerkleConstructionError, MerkleVerificationError,
41};
42pub use indexed_merkle_proof::IndexedMerkleProof;
43
44/// The output of the hash function.
45#[derive(Copy, Clone, DataSize, Ord, PartialOrd, Eq, PartialEq, Hash, Default, JsonSchema)]
46#[serde(deny_unknown_fields)]
47#[schemars(with = "String", description = "Hex-encoded hash digest.")]
48pub struct Digest(#[schemars(skip, with = "String")] [u8; Digest::LENGTH]);
49
50const CHUNK_DATA_ZEROED: &[u8] = &[0u8; ChunkWithProof::CHUNK_SIZE_BYTES];
51
52impl Digest {
53    /// The number of bytes in a `Digest`.
54    pub const LENGTH: usize = 32;
55
56    /// Sentinel hash to be used for hashing options in the case of `None`.
57    pub const SENTINEL_NONE: Digest = Digest([0u8; Digest::LENGTH]);
58    /// Sentinel hash to be used by `hash_slice_rfold`. Terminates the fold.
59    pub const SENTINEL_RFOLD: Digest = Digest([1u8; Digest::LENGTH]);
60    /// Sentinel hash to be used by `hash_merkle_tree` in the case of an empty list.
61    pub const SENTINEL_MERKLE_TREE: Digest = Digest([2u8; Digest::LENGTH]);
62
63    /// Creates a 32-byte BLAKE2b hash digest from a given a piece of data.
64    pub fn hash<T: AsRef<[u8]>>(data: T) -> Digest {
65        Self::blake2b_hash(data)
66    }
67
68    /// Creates a 32-byte BLAKE2b hash digest from a given a piece of data
69    pub(crate) fn blake2b_hash<T: AsRef<[u8]>>(data: T) -> Digest {
70        let mut ret = [0u8; Digest::LENGTH];
71        // NOTE: Safe to unwrap here because our digest length is constant and valid
72        let mut hasher = VarBlake2b::new(Digest::LENGTH).unwrap();
73        hasher.update(data);
74        hasher.finalize_variable(|hash| ret.clone_from_slice(hash));
75        Digest(ret)
76    }
77
78    /// Hashes a pair of byte slices.
79    pub fn hash_pair<T: AsRef<[u8]>, U: AsRef<[u8]>>(data1: T, data2: U) -> Digest {
80        let mut result = [0; Digest::LENGTH];
81        let mut hasher = VarBlake2b::new(Digest::LENGTH).unwrap();
82        hasher.update(data1);
83        hasher.update(data2);
84        hasher.finalize_variable(|slice| {
85            result.copy_from_slice(slice);
86        });
87        Digest(result)
88    }
89
90    /// Hashes a raw Merkle root and leaf count to firm the final Merkle hash.
91    ///
92    /// To avoid pre-image attacks, the final hash that is based upon the number of leaves in the
93    /// Merkle tree and the root hash is prepended with a padding to ensure it is longer than the
94    /// actual chunk size.
95    ///
96    /// Without this feature, an attacker could construct an item that is only a few bytes long but
97    /// hashes to the same value as a much longer, chunked item by hashing `(len || root hash of
98    /// longer item's Merkle tree root)`.
99    ///
100    /// This function computes the correct final hash by ensuring the hasher used has been
101    /// initialized with padding before. For efficiency reasons it uses a memoized hasher state
102    /// computed on first run and cloned afterwards.
103    fn hash_merkle_root(leaf_count: u64, root: Digest) -> Digest {
104        static PAIR_PREFIX_HASHER: OnceCell<VarBlake2b> = OnceCell::new();
105
106        let mut result = [0; Digest::LENGTH];
107        let mut hasher = PAIR_PREFIX_HASHER
108            .get_or_init(|| {
109                let mut hasher = VarBlake2b::new(Digest::LENGTH).unwrap();
110                hasher.update(CHUNK_DATA_ZEROED);
111                hasher
112            })
113            .clone();
114
115        hasher.update(leaf_count.to_le_bytes());
116        hasher.update(root);
117        hasher.finalize_variable(|slice| {
118            result.copy_from_slice(slice);
119        });
120        Digest(result)
121    }
122
123    /// Returns the underlying BLAKE2b hash bytes
124    pub fn value(&self) -> [u8; Digest::LENGTH] {
125        self.0
126    }
127
128    /// Converts the underlying BLAKE2b hash digest array to a `Vec`
129    pub fn into_vec(self) -> Vec<u8> {
130        self.0.to_vec()
131    }
132
133    /// Hashes an `impl IntoIterator` of [`Digest`]s into a single [`Digest`] by
134    /// constructing a [Merkle tree][1]. Reduces pairs of elements in the collection by repeatedly
135    /// calling [Digest::hash_pair].
136    ///
137    /// The pattern of hashing is as follows. It is akin to [graph reduction][2]:
138    ///
139    /// ```text
140    /// 1 2 4 5 8 9
141    /// │ │ │ │ │ │
142    /// └─3 └─6 └─10
143    ///   │   │   │
144    ///   └───7   │
145    ///       │   │
146    ///       └───11
147    /// ```
148    ///
149    /// Finally hashes the number of elements with the resulting hash. In the example above the
150    /// final output would be `hash_pair(6_u64.to_le_bytes(), l)`.
151    ///
152    /// Returns [`Digest::SENTINEL_MERKLE_TREE`] when the input is empty.
153    ///
154    /// [1]: https://en.wikipedia.org/wiki/Merkle_tree
155    /// [2]: https://en.wikipedia.org/wiki/Graph_reduction
156    pub fn hash_merkle_tree<I>(leaves: I) -> Digest
157    where
158        I: IntoIterator<Item = Digest>,
159        I::IntoIter: ExactSizeIterator,
160    {
161        let leaves = leaves.into_iter();
162        let leaf_count = leaves.len() as u64;
163
164        leaves.tree_fold1(Digest::hash_pair).map_or_else(
165            || Digest::SENTINEL_MERKLE_TREE,
166            |raw_root| Digest::hash_merkle_root(leaf_count, raw_root),
167        )
168    }
169
170    /// Hashes a `BTreeMap`.
171    pub fn hash_btree_map<K, V>(btree_map: &BTreeMap<K, V>) -> Result<Digest, bytesrepr::Error>
172    where
173        K: ToBytes,
174        V: ToBytes,
175    {
176        let mut kv_hashes: Vec<Digest> = Vec::with_capacity(btree_map.len());
177        for (key, value) in btree_map.iter() {
178            kv_hashes.push(Digest::hash_pair(
179                Digest::hash(key.to_bytes()?),
180                Digest::hash(value.to_bytes()?),
181            ))
182        }
183        Ok(Self::hash_merkle_tree(kv_hashes))
184    }
185
186    /// Hashes a `&[Digest]` using a [right fold][1].
187    ///
188    /// This pattern of hashing is as follows:
189    ///
190    /// ```text
191    /// hash_pair(a, &hash_pair(b, &hash_pair(c, &SENTINEL_RFOLD)))
192    /// ```
193    ///
194    /// Unlike Merkle trees, this is suited to hashing heterogeneous lists we may wish to extend in
195    /// the future (ie, hashes of data structures that may undergo revision).
196    ///
197    /// Returns [`Digest::SENTINEL_RFOLD`] when given an empty slice as input.
198    ///
199    /// [1]: https://en.wikipedia.org/wiki/Fold_(higher-order_function)#Linear_folds
200    pub fn hash_slice_rfold(slice: &[Digest]) -> Digest {
201        Self::hash_slice_with_proof(slice, Self::SENTINEL_RFOLD)
202    }
203
204    /// Hashes a `&[Digest]` using a [right fold][1]. Uses `proof` as a Merkle proof for the
205    /// missing tail of the slice.
206    ///
207    /// [1]: https://en.wikipedia.org/wiki/Fold_(higher-order_function)#Linear_folds
208    pub fn hash_slice_with_proof(slice: &[Digest], proof: Digest) -> Digest {
209        slice
210            .iter()
211            .rfold(proof, |prev, next| Digest::hash_pair(next, prev))
212    }
213
214    /// Returns a `Digest` parsed from a hex-encoded `Digest`.
215    pub fn from_hex<T: AsRef<[u8]>>(hex_input: T) -> Result<Self, Error> {
216        let bytes = checksummed_hex::decode(&hex_input).map_err(Error::Base16DecodeError)?;
217        let slice: [u8; Self::LENGTH] = bytes
218            .try_into()
219            .map_err(|_| Error::IncorrectDigestLength(hex_input.as_ref().len()))?;
220        Ok(Digest(slice))
221    }
222
223    /// Hash data into chunks if necessary.
224    pub fn hash_into_chunks_if_necessary(bytes: &[u8]) -> Digest {
225        if bytes.len() <= ChunkWithProof::CHUNK_SIZE_BYTES {
226            Digest::blake2b_hash(bytes)
227        } else {
228            Digest::hash_merkle_tree(
229                bytes
230                    .chunks(ChunkWithProof::CHUNK_SIZE_BYTES)
231                    .map(Digest::blake2b_hash),
232            )
233        }
234    }
235
236    /// Provides the same functionality as [`Digest::hash_merkle_tree`].
237    #[deprecated(since = "1.5.0", note = "use `hash_merkle_tree` instead")]
238    pub fn hash_vec_merkle_tree(vec: Vec<Digest>) -> Digest {
239        Digest::hash_merkle_tree(vec)
240    }
241}
242
243impl CLTyped for Digest {
244    fn cl_type() -> CLType {
245        CLType::ByteArray(Digest::LENGTH as u32)
246    }
247}
248
249#[cfg(test)]
250impl Distribution<Digest> for Standard {
251    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Digest {
252        Digest(rng.gen::<[u8; Digest::LENGTH]>())
253    }
254}
255
256impl LowerHex for Digest {
257    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
258        let hex_string = base16::encode_lower(&self.value());
259        if f.alternate() {
260            write!(f, "0x{}", hex_string)
261        } else {
262            write!(f, "{}", hex_string)
263        }
264    }
265}
266
267impl UpperHex for Digest {
268    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
269        let hex_string = base16::encode_upper(&self.value());
270        if f.alternate() {
271            write!(f, "0x{}", hex_string)
272        } else {
273            write!(f, "{}", hex_string)
274        }
275    }
276}
277
278impl Display for Digest {
279    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
280        write!(f, "{:10}", HexFmt(&self.0))
281    }
282}
283
284impl Debug for Digest {
285    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
286        write!(f, "{}", base16::encode_lower(&self.0))
287    }
288}
289
290impl From<[u8; Digest::LENGTH]> for Digest {
291    fn from(arr: [u8; Digest::LENGTH]) -> Self {
292        Digest(arr)
293    }
294}
295
296impl<'a> TryFrom<&'a [u8]> for Digest {
297    type Error = TryFromSliceError;
298
299    fn try_from(slice: &[u8]) -> Result<Digest, Self::Error> {
300        <[u8; Digest::LENGTH]>::try_from(slice).map(Digest)
301    }
302}
303
304impl AsRef<[u8]> for Digest {
305    fn as_ref(&self) -> &[u8] {
306        self.0.as_ref()
307    }
308}
309
310impl From<Digest> for [u8; Digest::LENGTH] {
311    fn from(hash: Digest) -> Self {
312        hash.0
313    }
314}
315
316impl ToBytes for Digest {
317    #[inline(always)]
318    fn to_bytes(&self) -> Result<Vec<u8>, bytesrepr::Error> {
319        self.0.to_bytes()
320    }
321
322    #[inline(always)]
323    fn serialized_length(&self) -> usize {
324        self.0.serialized_length()
325    }
326
327    #[inline(always)]
328    fn write_bytes(&self, writer: &mut Vec<u8>) -> Result<(), bytesrepr::Error> {
329        writer.extend_from_slice(&self.0);
330        Ok(())
331    }
332}
333
334impl FromBytes for Digest {
335    #[inline(always)]
336    fn from_bytes(bytes: &[u8]) -> Result<(Self, &[u8]), bytesrepr::Error> {
337        FromBytes::from_bytes(bytes).map(|(arr, rem)| (Digest(arr), rem))
338    }
339}
340
341impl Serialize for Digest {
342    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
343        if serializer.is_human_readable() {
344            base16::encode_lower(&self.0).serialize(serializer)
345        } else {
346            // This is to keep backwards compatibility with how HexForm encodes
347            // byte arrays. HexForm treats this like a slice.
348            self.0[..].serialize(serializer)
349        }
350    }
351}
352
353impl<'de> Deserialize<'de> for Digest {
354    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
355        if deserializer.is_human_readable() {
356            let hex_string = String::deserialize(deserializer)?;
357            let bytes =
358                checksummed_hex::decode(hex_string.as_bytes()).map_err(SerdeError::custom)?;
359            let data =
360                <[u8; Digest::LENGTH]>::try_from(bytes.as_ref()).map_err(SerdeError::custom)?;
361            Ok(Digest::from(data))
362        } else {
363            let data = <Vec<u8>>::deserialize(deserializer)?;
364            Digest::try_from(data.as_slice()).map_err(D::Error::custom)
365        }
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use std::{collections::BTreeMap, iter};
372
373    use proptest_attr_macro::proptest;
374
375    use casper_types::bytesrepr::{self, ToBytes};
376
377    use crate::{ChunkWithProof, Digest};
378
379    #[proptest]
380    fn bytesrepr_roundtrip(data: [u8; Digest::LENGTH]) {
381        let hash = Digest(data);
382        bytesrepr::test_serialization_roundtrip(&hash);
383    }
384
385    #[proptest]
386    fn serde_roundtrip(data: [u8; Digest::LENGTH]) {
387        let original_hash = Digest(data);
388        let serialized = serde_json::to_string(&original_hash).unwrap();
389        let deserialized_hash: Digest = serde_json::from_str(&serialized).unwrap();
390        assert_eq!(original_hash, deserialized_hash);
391    }
392
393    #[test]
394    fn serde_custom_serialization() {
395        let serialized = serde_json::to_string(&Digest::SENTINEL_RFOLD).unwrap();
396        let expected = format!("\"{:?}\"", Digest::SENTINEL_RFOLD);
397        assert_eq!(expected, serialized);
398    }
399
400    #[test]
401    fn hash_known() {
402        // Data of length less or equal to [ChunkWithProof::CHUNK_SIZE_BYTES]
403        // are hashed using Blake2B algorithm.
404        // Larger data are chunked and Merkle tree hash is calculated.
405        //
406        // Please note that [ChunkWithProof::CHUNK_SIZE_BYTES] is `test` configuration
407        // is smaller than in production, to allow testing with more chunks
408        // with still reasonable time and memory consumption.
409        //
410        // See: [Digest::hash]
411        let inputs_and_digests = [
412            (
413                "",
414                "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8",
415            ),
416            (
417                "abc",
418                "bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319",
419            ),
420            (
421                "0123456789",
422                "7b6cb8d374484e221785288b035dc53fc9ddf000607f473fc2a3258d89a70398",
423            ),
424            (
425                "01234567890",
426                "3d199478c18b7fe3ca1f4f2a9b3e07f708ff66ed52eb345db258abe8a812ed5c",
427            ),
428            (
429                "The quick brown fox jumps over the lazy dog",
430                "01718cec35cd3d796dd00020e0bfecb473ad23457d063b75eff29c0ffa2e58a9",
431            ),
432        ];
433        for (known_input, expected_digest) in &inputs_and_digests {
434            let known_input: &[u8] = known_input.as_ref();
435            assert_eq!(*expected_digest, format!("{:?}", Digest::hash(known_input)));
436        }
437    }
438
439    #[test]
440    fn from_valid_hex_should_succeed() {
441        for char in "abcdefABCDEF0123456789".chars() {
442            let input: String = iter::repeat(char).take(64).collect();
443            assert!(Digest::from_hex(input).is_ok());
444        }
445    }
446
447    #[test]
448    fn from_hex_invalid_length_should_fail() {
449        for len in &[2_usize, 62, 63, 65, 66] {
450            let input: String = "f".repeat(*len);
451            assert!(Digest::from_hex(input).is_err());
452        }
453    }
454
455    #[test]
456    fn from_hex_invalid_char_should_fail() {
457        for char in "g %-".chars() {
458            let input: String = iter::repeat('f').take(63).chain(iter::once(char)).collect();
459            assert!(Digest::from_hex(input).is_err());
460        }
461    }
462
463    #[test]
464    fn should_display_digest_in_hex() {
465        let hash = Digest([0u8; 32]);
466        let hash_hex = format!("{:?}", hash);
467        assert_eq!(
468            hash_hex,
469            "0000000000000000000000000000000000000000000000000000000000000000"
470        );
471    }
472
473    #[test]
474    fn should_print_digest_lower_hex() {
475        let hash = Digest([10u8; 32]);
476        let hash_lower_hex = format!("{:x}", hash);
477        assert_eq!(
478            hash_lower_hex,
479            "0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a"
480        )
481    }
482
483    #[test]
484    fn should_print_digest_upper_hex() {
485        let hash = Digest([10u8; 32]);
486        let hash_upper_hex = format!("{:X}", hash);
487        assert_eq!(
488            hash_upper_hex,
489            "0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A0A"
490        )
491    }
492
493    #[test]
494    fn alternate_should_prepend_0x() {
495        let hash = Digest([0u8; 32]);
496        let hash_hex_alt = format!("{:#x}", hash);
497        assert_eq!(
498            hash_hex_alt,
499            "0x0000000000000000000000000000000000000000000000000000000000000000"
500        )
501    }
502
503    #[test]
504    fn test_hash_pair() {
505        let hash1 = Digest([1u8; 32]);
506        let hash2 = Digest([2u8; 32]);
507
508        let hash = Digest::hash_pair(hash1, hash2);
509        let hash_lower_hex = format!("{:x}", hash);
510
511        assert_eq!(
512            hash_lower_hex,
513            "30b600fb1f0cc0b3f0fc28cdcb7389405a6659be81c7d5c5905725aa3a5119ce"
514        );
515    }
516
517    #[test]
518    fn test_hash_rfold() {
519        let hashes = [
520            Digest([1u8; 32]),
521            Digest([2u8; 32]),
522            Digest([3u8; 32]),
523            Digest([4u8; 32]),
524            Digest([5u8; 32]),
525        ];
526
527        let hash = Digest::hash_slice_rfold(&hashes[..]);
528        let hash_lower_hex = format!("{:x}", hash);
529
530        assert_eq!(
531            hash_lower_hex,
532            "e137f4eb94d2387065454eecfe2cdb5584e3dbd5f1ca07fc511fffd13d234e8e"
533        );
534
535        let proof = Digest::hash_slice_rfold(&hashes[2..]);
536        let hash_proof = Digest::hash_slice_with_proof(&hashes[..2], proof);
537
538        assert_eq!(hash, hash_proof);
539    }
540
541    #[test]
542    fn test_hash_merkle_odd() {
543        let hashes = vec![
544            Digest([1u8; 32]),
545            Digest([2u8; 32]),
546            Digest([3u8; 32]),
547            Digest([4u8; 32]),
548            Digest([5u8; 32]),
549        ];
550
551        let hash = Digest::hash_merkle_tree(hashes);
552        let hash_lower_hex = format!("{:x}", hash);
553
554        assert_eq!(
555            hash_lower_hex,
556            "775cec8133b97b0e8d4e97659025d5bac4ed7c8927d1bd99cf62114df57f3e74"
557        );
558    }
559
560    #[test]
561    fn test_hash_merkle_even() {
562        let hashes = vec![
563            Digest([1u8; 32]),
564            Digest([2u8; 32]),
565            Digest([3u8; 32]),
566            Digest([4u8; 32]),
567            Digest([5u8; 32]),
568            Digest([6u8; 32]),
569        ];
570
571        let hash = Digest::hash_merkle_tree(hashes);
572        let hash_lower_hex = format!("{:x}", hash);
573
574        assert_eq!(
575            hash_lower_hex,
576            "4bd50b08a8366b28c35bc831b95d147123bad01c29ffbf854b659c4b3ea4086c"
577        );
578    }
579
580    #[test]
581    fn test_hash_btreemap() {
582        let mut map = BTreeMap::new();
583        let _ = map.insert(Digest([1u8; 32]), Digest([2u8; 32]));
584        let _ = map.insert(Digest([3u8; 32]), Digest([4u8; 32]));
585        let _ = map.insert(Digest([5u8; 32]), Digest([6u8; 32]));
586        let _ = map.insert(Digest([7u8; 32]), Digest([8u8; 32]));
587        let _ = map.insert(Digest([9u8; 32]), Digest([10u8; 32]));
588
589        let hash = Digest::hash_btree_map(&map).unwrap();
590        let hash_lower_hex = format!("{:x}", hash);
591
592        assert_eq!(
593            hash_lower_hex,
594            "fd1214a627473ffc6d6cc97e7012e6344d74abbf987b48cde5d0642049a0db98"
595        );
596    }
597
598    #[test]
599    fn digest_deserialize_regression() {
600        let input = Digest([0; 32]);
601        let serialized = bincode::serialize(&input).expect("failed to serialize.");
602
603        let expected = vec![
604            32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
606        ];
607
608        assert_eq!(expected, serialized);
609    }
610
611    #[test]
612    fn should_assert_simple_digest_serialization_format() {
613        let digest_bytes = [0; 32];
614
615        assert_eq!(
616            Digest(digest_bytes).to_bytes().unwrap(),
617            digest_bytes.to_vec()
618        );
619    }
620
621    #[test]
622    fn merkle_roots_are_preimage_resistent() {
623        // Input data is two chunks long.
624        //
625        // The resulting tree will look like this:
626        //
627        // 1..0  a..j
628        // │     │
629        // └─────── R
630        //
631        // The Merkle root is thus: R = h( h(1..0) || h(a..j) )
632        //
633        // h(1..0) = 807f1ba73147c3a96c2d63b38dd5a5f514f66290a1436bb9821e9f2a72eff263
634        // h(a..j) = 499e1cdb476523fedafc9d9db31125e2744f271578ea95b16ab4bd1905f05fea
635        // R=h(h(1..0)||h(a..j)) = 1319394a98d0cb194f960e3748baeb2045a9ec28aa51e0d42011be43f4a91f5f
636        // h(2u64le || R) = c31f0bb6ef569354d1a26c3a51f1ad4b6d87cef7f73a290ab6be8db6a9c7d4ee
637        //
638        // The final step is to hash h(2u64le || R), which is the length as little endian
639        // concatenated with the root.
640
641        // Constants used here assume a chunk size of 10 bytes.
642        assert_eq!(ChunkWithProof::CHUNK_SIZE_BYTES, 10);
643
644        let long_data = b"1234567890abcdefghij";
645        assert_eq!(long_data.len(), ChunkWithProof::CHUNK_SIZE_BYTES * 2);
646
647        // The `long_data_hash` is constructed manually here, as `Digest::hash` still had
648        // deactivated chunking code at the time this test was written.
649        let long_data_hash = Digest::hash_merkle_tree(
650            long_data
651                .as_ref()
652                .chunks(ChunkWithProof::CHUNK_SIZE_BYTES)
653                .map(Digest::blake2b_hash),
654        );
655
656        // The concatenation of `2u64` in little endian + the Merkle root hash `R`. Note that this
657        // is a valid hashable object on its own.
658        let maybe_colliding_short_data = [
659            2, 0, 0, 0, 0, 0, 0, 0, 19, 25, 57, 74, 152, 208, 203, 25, 79, 150, 14, 55, 72, 186,
660            235, 32, 69, 169, 236, 40, 170, 81, 224, 212, 32, 17, 190, 67, 244, 169, 31, 95,
661        ];
662
663        // Use `blake2b_hash` to work around the issue of the chunk size being shorter than the
664        // digest length.
665        let short_data_hash = Digest::blake2b_hash(maybe_colliding_short_data);
666
667        // Ensure there is no collision. You can verify this test is correct by temporarily changing
668        // the `Digest::hash_merkle_tree` function to use the unpadded `hash_pair` function, instead
669        // of `hash_merkle_root`.
670        assert_ne!(long_data_hash, short_data_hash);
671
672        // The expected input for the root hash is the colliding data, but prefixed with a full
673        // chunk of zeros.
674        let expected_final_hash_input = [
675            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 19, 25, 57, 74, 152, 208, 203,
676            25, 79, 150, 14, 55, 72, 186, 235, 32, 69, 169, 236, 40, 170, 81, 224, 212, 32, 17,
677            190, 67, 244, 169, 31, 95,
678        ];
679        assert_eq!(
680            Digest::blake2b_hash(expected_final_hash_input),
681            long_data_hash
682        );
683
684        // Another way to specify this sanity check is to say that the short and long data should
685        // hash differently.
686        //
687        // Note: This condition is true at the time of writing this test, where chunk hashing is
688        //       disabled. It should still hold true once enabled.
689        assert_ne!(
690            Digest::hash(maybe_colliding_short_data),
691            Digest::hash(long_data)
692        );
693
694        // In a similar manner, the internal padded data should also not hash equal to either, as it
695        // should be hashed using the chunking function.
696        assert_ne!(
697            Digest::hash(maybe_colliding_short_data),
698            Digest::hash(expected_final_hash_input)
699        );
700        assert_ne!(
701            Digest::hash(long_data),
702            Digest::hash(expected_final_hash_input)
703        );
704    }
705}