zebra_chain/block/
merkle.rs

1//! The Bitcoin-inherited Merkle tree of transactions.
2
3use std::{fmt, io::Write};
4
5use hex::{FromHex, ToHex};
6
7use crate::{
8    serialization::{sha256d, BytesInDisplayOrder},
9    transaction::{self, Transaction, UnminedTx, UnminedTxId, VerifiedUnminedTx},
10};
11
12#[cfg(any(test, feature = "proptest-impl"))]
13use proptest_derive::Arbitrary;
14
15/// The root of the Bitcoin-inherited transaction Merkle tree, binding the
16/// block header to the transactions in the block.
17///
18/// Note: for V5-onward transactions it does not bind to authorizing data
19/// (signature and proofs) which makes it non-malleable [ZIP-244].
20///
21/// Note that because of a flaw in Bitcoin's design, the `merkle_root` does
22/// not always precisely bind the contents of the block (CVE-2012-2459). It
23/// is sometimes possible for an attacker to create multiple distinct sets of
24/// transactions with the same Merkle root, although only one set will be
25/// valid.
26///
27/// # Malleability
28///
29/// The Bitcoin source code contains the following note:
30///
31/// > WARNING! If you're reading this because you're learning about crypto
32/// > and/or designing a new system that will use merkle trees, keep in mind
33/// > that the following merkle tree algorithm has a serious flaw related to
34/// > duplicate txids, resulting in a vulnerability (CVE-2012-2459).
35/// > The reason is that if the number of hashes in the list at a given time
36/// > is odd, the last one is duplicated before computing the next level (which
37/// > is unusual in Merkle trees). This results in certain sequences of
38/// > transactions leading to the same merkle root. For example, these two
39/// > trees:
40/// >
41/// > ```ascii
42/// >              A               A
43/// >            /  \            /   \
44/// >          B     C         B       C
45/// >         / \    |        / \     / \
46/// >        D   E   F       D   E   F   F
47/// >       / \ / \ / \     / \ / \ / \ / \
48/// >       1 2 3 4 5 6     1 2 3 4 5 6 5 6
49/// > ```
50/// >
51/// > for transaction lists \[1,2,3,4,5,6\] and \[1,2,3,4,5,6,5,6\] (where 5 and
52/// > 6 are repeated) result in the same root hash A (because the hash of both
53/// > of (F) and (F,F) is C).
54/// >
55/// > The vulnerability results from being able to send a block with such a
56/// > transaction list, with the same merkle root, and the same block hash as
57/// > the original without duplication, resulting in failed validation. If the
58/// > receiving node proceeds to mark that block as permanently invalid
59/// > however, it will fail to accept further unmodified (and thus potentially
60/// > valid) versions of the same block. We defend against this by detecting
61/// > the case where we would hash two identical hashes at the end of the list
62/// > together, and treating that identically to the block having an invalid
63/// > merkle root. Assuming no double-SHA256 collisions, this will detect all
64/// > known ways of changing the transactions without affecting the merkle
65/// > root.
66///
67/// This vulnerability does not apply to Zebra, because it does not store invalid
68/// data on disk, and because it does not permanently fail blocks or use an
69/// aggressive anti-DoS mechanism.
70///
71/// [ZIP-244]: https://zips.z.cash/zip-0244
72#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
73#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary, Default))]
74pub struct Root(pub [u8; 32]);
75
76impl fmt::Debug for Root {
77    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
78        f.debug_tuple("Root").field(&hex::encode(self.0)).finish()
79    }
80}
81
82impl From<[u8; 32]> for Root {
83    fn from(hash: [u8; 32]) -> Self {
84        Root(hash)
85    }
86}
87
88impl From<Root> for [u8; 32] {
89    fn from(hash: Root) -> Self {
90        hash.0
91    }
92}
93
94impl BytesInDisplayOrder<true> for Root {
95    fn bytes_in_serialized_order(&self) -> [u8; 32] {
96        self.0
97    }
98
99    fn from_bytes_in_serialized_order(bytes: [u8; 32]) -> Self {
100        Root(bytes)
101    }
102}
103
104impl ToHex for &Root {
105    fn encode_hex<T: FromIterator<char>>(&self) -> T {
106        self.bytes_in_display_order().encode_hex()
107    }
108
109    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
110        self.bytes_in_display_order().encode_hex_upper()
111    }
112}
113
114impl ToHex for Root {
115    fn encode_hex<T: FromIterator<char>>(&self) -> T {
116        (&self).encode_hex()
117    }
118
119    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
120        (&self).encode_hex_upper()
121    }
122}
123
124impl FromHex for Root {
125    type Error = <[u8; 32] as FromHex>::Error;
126
127    fn from_hex<T: AsRef<[u8]>>(hex: T) -> Result<Self, Self::Error> {
128        let mut hash = <[u8; 32]>::from_hex(hex)?;
129        hash.reverse();
130
131        Ok(hash.into())
132    }
133}
134
135fn hash(h1: &[u8; 32], h2: &[u8; 32]) -> [u8; 32] {
136    let mut w = sha256d::Writer::default();
137    w.write_all(h1).unwrap();
138    w.write_all(h2).unwrap();
139    w.finish()
140}
141
142fn auth_data_hash(h1: &[u8; 32], h2: &[u8; 32]) -> [u8; 32] {
143    // > Non-leaf hashes in this tree are BLAKE2b-256 hashes personalized by
144    // > the string "ZcashAuthDatHash".
145    // https://zips.z.cash/zip-0244#block-header-changes
146    blake2b_simd::Params::new()
147        .hash_length(32)
148        .personal(b"ZcashAuthDatHash")
149        .to_state()
150        .update(h1)
151        .update(h2)
152        .finalize()
153        .as_bytes()
154        .try_into()
155        .expect("32 byte array")
156}
157
158impl<T> std::iter::FromIterator<T> for Root
159where
160    T: std::convert::AsRef<Transaction>,
161{
162    fn from_iter<I>(transactions: I) -> Self
163    where
164        I: IntoIterator<Item = T>,
165    {
166        transactions
167            .into_iter()
168            .map(|tx| tx.as_ref().hash())
169            .collect()
170    }
171}
172
173impl std::iter::FromIterator<UnminedTx> for Root {
174    fn from_iter<I>(transactions: I) -> Self
175    where
176        I: IntoIterator<Item = UnminedTx>,
177    {
178        transactions
179            .into_iter()
180            .map(|tx| tx.id.mined_id())
181            .collect()
182    }
183}
184
185impl std::iter::FromIterator<UnminedTxId> for Root {
186    fn from_iter<I>(tx_ids: I) -> Self
187    where
188        I: IntoIterator<Item = UnminedTxId>,
189    {
190        tx_ids.into_iter().map(|tx_id| tx_id.mined_id()).collect()
191    }
192}
193
194impl std::iter::FromIterator<VerifiedUnminedTx> for Root {
195    fn from_iter<I>(transactions: I) -> Self
196    where
197        I: IntoIterator<Item = VerifiedUnminedTx>,
198    {
199        transactions
200            .into_iter()
201            .map(|tx| tx.transaction.id.mined_id())
202            .collect()
203    }
204}
205
206impl std::iter::FromIterator<transaction::Hash> for Root {
207    /// # Panics
208    ///
209    /// When there are no transactions in the iterator.
210    /// This is impossible, because every block must have a coinbase transaction.
211    fn from_iter<I>(hashes: I) -> Self
212    where
213        I: IntoIterator<Item = transaction::Hash>,
214    {
215        let mut hashes = hashes.into_iter().map(|hash| hash.0).collect::<Vec<_>>();
216        while hashes.len() > 1 {
217            hashes = hashes
218                .chunks(2)
219                .map(|chunk| match chunk {
220                    [h1, h2] => hash(h1, h2),
221                    [h1] => hash(h1, h1),
222                    _ => unreachable!("chunks(2)"),
223                })
224                .collect();
225        }
226        Self(hashes[0])
227    }
228}
229
230/// The root of the authorizing data Merkle tree, binding the
231/// block header to the authorizing data of the block (signatures, proofs)
232/// as defined in [ZIP-244].
233///
234/// See [`Root`] for an important disclaimer.
235///
236/// [ZIP-244]: https://zips.z.cash/zip-0244
237#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
238#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary))]
239pub struct AuthDataRoot(pub(crate) [u8; 32]);
240
241impl fmt::Debug for AuthDataRoot {
242    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
243        f.debug_tuple("AuthRoot")
244            .field(&hex::encode(self.0))
245            .finish()
246    }
247}
248
249impl From<[u8; 32]> for AuthDataRoot {
250    fn from(hash: [u8; 32]) -> Self {
251        AuthDataRoot(hash)
252    }
253}
254
255impl From<AuthDataRoot> for [u8; 32] {
256    fn from(hash: AuthDataRoot) -> Self {
257        hash.0
258    }
259}
260
261impl BytesInDisplayOrder<true> for AuthDataRoot {
262    fn bytes_in_serialized_order(&self) -> [u8; 32] {
263        self.0
264    }
265
266    fn from_bytes_in_serialized_order(bytes: [u8; 32]) -> Self {
267        AuthDataRoot(bytes)
268    }
269}
270
271impl ToHex for &AuthDataRoot {
272    fn encode_hex<T: FromIterator<char>>(&self) -> T {
273        self.bytes_in_display_order().encode_hex()
274    }
275
276    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
277        self.bytes_in_display_order().encode_hex_upper()
278    }
279}
280
281impl ToHex for AuthDataRoot {
282    fn encode_hex<T: FromIterator<char>>(&self) -> T {
283        (&self).encode_hex()
284    }
285
286    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
287        (&self).encode_hex_upper()
288    }
289}
290
291impl FromHex for AuthDataRoot {
292    type Error = <[u8; 32] as FromHex>::Error;
293
294    fn from_hex<T: AsRef<[u8]>>(hex: T) -> Result<Self, Self::Error> {
295        let mut hash = <[u8; 32]>::from_hex(hex)?;
296        hash.reverse();
297
298        Ok(hash.into())
299    }
300}
301
302/// The placeholder used for the [`AuthDigest`](transaction::AuthDigest) of pre-v5 transactions.
303///
304/// # Consensus
305///
306/// > For transaction versions before v5, a placeholder value consisting
307/// > of 32 bytes of 0xFF is used in place of the authorizing data commitment.
308/// > This is only used in the tree committed to by hashAuthDataRoot.
309///
310/// <https://zips.z.cash/zip-0244#authorizing-data-commitment>
311pub const AUTH_DIGEST_PLACEHOLDER: transaction::AuthDigest = transaction::AuthDigest([0xFF; 32]);
312
313impl<T> std::iter::FromIterator<T> for AuthDataRoot
314where
315    T: std::convert::AsRef<Transaction>,
316{
317    fn from_iter<I>(transactions: I) -> Self
318    where
319        I: IntoIterator<Item = T>,
320    {
321        transactions
322            .into_iter()
323            .map(|tx| tx.as_ref().auth_digest().unwrap_or(AUTH_DIGEST_PLACEHOLDER))
324            .collect()
325    }
326}
327
328impl std::iter::FromIterator<UnminedTx> for AuthDataRoot {
329    fn from_iter<I>(transactions: I) -> Self
330    where
331        I: IntoIterator<Item = UnminedTx>,
332    {
333        transactions
334            .into_iter()
335            .map(|tx| tx.id.auth_digest().unwrap_or(AUTH_DIGEST_PLACEHOLDER))
336            .collect()
337    }
338}
339
340impl std::iter::FromIterator<VerifiedUnminedTx> for AuthDataRoot {
341    fn from_iter<I>(transactions: I) -> Self
342    where
343        I: IntoIterator<Item = VerifiedUnminedTx>,
344    {
345        transactions
346            .into_iter()
347            .map(|tx| {
348                tx.transaction
349                    .id
350                    .auth_digest()
351                    .unwrap_or(AUTH_DIGEST_PLACEHOLDER)
352            })
353            .collect()
354    }
355}
356
357impl std::iter::FromIterator<UnminedTxId> for AuthDataRoot {
358    fn from_iter<I>(tx_ids: I) -> Self
359    where
360        I: IntoIterator<Item = UnminedTxId>,
361    {
362        tx_ids
363            .into_iter()
364            .map(|tx_id| tx_id.auth_digest().unwrap_or(AUTH_DIGEST_PLACEHOLDER))
365            .collect()
366    }
367}
368
369impl std::iter::FromIterator<transaction::AuthDigest> for AuthDataRoot {
370    fn from_iter<I>(hashes: I) -> Self
371    where
372        I: IntoIterator<Item = transaction::AuthDigest>,
373    {
374        let mut hashes = hashes.into_iter().map(|hash| hash.0).collect::<Vec<_>>();
375        // > This new commitment is named hashAuthDataRoot and is the root of a
376        // > binary Merkle tree of transaction authorizing data commitments [...]
377        // > padded with leaves having the "null" hash value [0u8; 32].
378        // https://zips.z.cash/zip-0244#block-header-changes
379        // Pad with enough leaves to make the tree full (a power of 2).
380        let pad_count = hashes.len().next_power_of_two() - hashes.len();
381        hashes.extend(std::iter::repeat_n([0u8; 32], pad_count));
382        assert!(hashes.len().is_power_of_two());
383
384        while hashes.len() > 1 {
385            hashes = hashes
386                .chunks(2)
387                .map(|chunk| match chunk {
388                    [h1, h2] => auth_data_hash(h1, h2),
389                    _ => unreachable!("number of nodes is always even since tree is full"),
390                })
391                .collect();
392        }
393
394        Self(hashes[0])
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    use crate::{block::Block, serialization::ZcashDeserialize, transaction::AuthDigest};
403
404    #[test]
405    fn block_test_vectors() {
406        for block_bytes in zebra_test::vectors::BLOCKS.iter() {
407            let block = Block::zcash_deserialize(&**block_bytes).unwrap();
408            let merkle_root = block.transactions.iter().collect::<Root>();
409            assert_eq!(
410                merkle_root,
411                block.header.merkle_root,
412                "block: {:?} {:?} transaction hashes: {:?}",
413                block.coinbase_height().unwrap(),
414                block.hash(),
415                block
416                    .transactions
417                    .iter()
418                    .map(|tx| tx.hash())
419                    .collect::<Vec<_>>()
420            );
421        }
422    }
423
424    #[test]
425    fn auth_digest() {
426        for block_bytes in zebra_test::vectors::BLOCKS.iter() {
427            let block = Block::zcash_deserialize(&**block_bytes).unwrap();
428            let _auth_root = block.transactions.iter().collect::<AuthDataRoot>();
429            // No test vectors for now, so just check it computes without panicking
430        }
431    }
432
433    #[test]
434    fn auth_data_padding() {
435        // Compute the root of a 3-leaf tree with arbitrary leaves
436        let mut v = vec![
437            AuthDigest([0x42; 32]),
438            AuthDigest([0xAA; 32]),
439            AuthDigest([0x77; 32]),
440        ];
441        let root_3 = v.iter().copied().collect::<AuthDataRoot>();
442
443        // Compute the root a 4-leaf tree with the same leaves as before and
444        // an additional all-zeroes leaf.
445        // Since this is the same leaf used as padding in the previous tree,
446        // then both trees must have the same root.
447        v.push(AuthDigest([0x00; 32]));
448        let root_4 = v.iter().copied().collect::<AuthDataRoot>();
449
450        assert_eq!(root_3, root_4);
451    }
452
453    #[test]
454    fn auth_data_pre_v5() {
455        // Compute the AuthDataRoot for a single transaction of an arbitrary pre-V5 block
456        let block =
457            Block::zcash_deserialize(&**zebra_test::vectors::BLOCK_MAINNET_1046400_BYTES).unwrap();
458        let auth_root = block.transactions.iter().take(1).collect::<AuthDataRoot>();
459
460        // Compute the AuthDataRoot with a single [0xFF; 32] digest.
461        // Since ZIP-244 specifies that this value must be used as the auth digest of
462        // pre-V5 transactions, then the roots must match.
463        let expect_auth_root = [AuthDigest([0xFF; 32])]
464            .iter()
465            .copied()
466            .collect::<AuthDataRoot>();
467
468        assert_eq!(auth_root, expect_auth_root);
469    }
470}