clvm_utils/
tree_hash.rs

1use chia_sha2::Sha256;
2use clvmr::allocator::{Allocator, NodePtr, NodeVisitor, ObjectType, SExp};
3use clvmr::serde::node_from_bytes_backrefs;
4use hex_literal::hex;
5use std::ops::Deref;
6use std::{fmt, io};
7
8#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
9pub struct TreeHash([u8; 32]);
10
11impl TreeHash {
12    pub const fn new(hash: [u8; 32]) -> Self {
13        Self(hash)
14    }
15
16    pub const fn to_bytes(&self) -> [u8; 32] {
17        self.0
18    }
19
20    pub fn to_vec(&self) -> Vec<u8> {
21        self.0.to_vec()
22    }
23}
24
25impl fmt::Debug for TreeHash {
26    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27        write!(f, "TreeHash({self})")
28    }
29}
30
31impl fmt::Display for TreeHash {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        write!(f, "{}", hex::encode(self.0))
34    }
35}
36
37impl From<[u8; 32]> for TreeHash {
38    fn from(hash: [u8; 32]) -> Self {
39        Self::new(hash)
40    }
41}
42
43impl From<TreeHash> for [u8; 32] {
44    fn from(hash: TreeHash) -> [u8; 32] {
45        hash.0
46    }
47}
48
49impl AsRef<[u8]> for TreeHash {
50    fn as_ref(&self) -> &[u8] {
51        &self.0
52    }
53}
54
55impl Deref for TreeHash {
56    type Target = [u8];
57
58    fn deref(&self) -> &Self::Target {
59        &self.0
60    }
61}
62
63#[derive(Default)]
64pub struct TreeCache {
65    hashes: Vec<TreeHash>,
66    // each entry is an index into hashes, or one of 3 special values:
67    // u16::MAX if the pair has not been visited
68    // u16::MAX - 1 if the pair has been seen once
69    // u16::MAX - 2 if the pair has been seen at least twice (this makes it a
70    // candidate for memoization)
71    pairs: Vec<u16>,
72}
73
74const NOT_VISITED: u16 = u16::MAX;
75const SEEN_ONCE: u16 = u16::MAX - 1;
76const SEEN_MULTIPLE: u16 = u16::MAX - 2;
77
78impl TreeCache {
79    pub fn get(&self, n: NodePtr) -> Option<&TreeHash> {
80        // We only cache pairs (for now)
81        if !matches!(n.object_type(), ObjectType::Pair) {
82            return None;
83        }
84
85        let idx = n.index() as usize;
86        let slot = *self.pairs.get(idx)?;
87        if slot >= SEEN_MULTIPLE {
88            return None;
89        }
90        Some(&self.hashes[slot as usize])
91    }
92
93    pub fn insert(&mut self, n: NodePtr, hash: &TreeHash) {
94        // If we've reached the max size, just ignore new cache items
95        if self.hashes.len() == SEEN_MULTIPLE as usize {
96            return;
97        }
98
99        if !matches!(n.object_type(), ObjectType::Pair) {
100            return;
101        }
102
103        let idx = n.index() as usize;
104        if idx >= self.pairs.len() {
105            self.pairs.resize(idx + 1, NOT_VISITED);
106        }
107
108        let slot = self.hashes.len();
109        self.hashes.push(*hash);
110        self.pairs[idx] = slot as u16;
111    }
112
113    /// mark the node as being visited. Returns true if we need to
114    /// traverse visitation down this node.
115    fn visit(&mut self, n: NodePtr) -> bool {
116        if !matches!(n.object_type(), ObjectType::Pair) {
117            return false;
118        }
119        let idx = n.index() as usize;
120        if idx >= self.pairs.len() {
121            self.pairs.resize(idx + 1, NOT_VISITED);
122        }
123        if self.pairs[idx] > SEEN_MULTIPLE {
124            self.pairs[idx] -= 1;
125        }
126        self.pairs[idx] == SEEN_ONCE
127    }
128
129    pub fn should_memoize(&mut self, n: NodePtr) -> bool {
130        if !matches!(n.object_type(), ObjectType::Pair) {
131            return false;
132        }
133        let idx = n.index() as usize;
134        if idx >= self.pairs.len() {
135            false
136        } else {
137            self.pairs[idx] <= SEEN_MULTIPLE
138        }
139    }
140
141    pub fn visit_tree(&mut self, a: &Allocator, node: NodePtr) {
142        if !self.visit(node) {
143            return;
144        }
145        let mut nodes = vec![node];
146        while let Some(n) = nodes.pop() {
147            let SExp::Pair(left, right) = a.sexp(n) else {
148                continue;
149            };
150            if self.visit(left) {
151                nodes.push(left);
152            }
153            if self.visit(right) {
154                nodes.push(right);
155            }
156        }
157    }
158}
159
160enum TreeOp {
161    SExp(NodePtr),
162    Cons,
163    ConsAddCache(NodePtr),
164}
165
166// contains SHA256(1 .. x), where x is the index into the array and .. is
167// concatenation. This was computed by:
168// from hashlib import sha256
169// print(f"    th!(\"{sha256(bytes([1])).hexdigest()}\"),")
170// for i in range(1, 24):
171//     print(f"    th!(\"{sha256(bytes([1, i])).hexdigest()}\"),")
172
173macro_rules! th {
174    ($hash:expr) => {
175        TreeHash::new(hex!($hash))
176    };
177}
178pub const PRECOMPUTED_HASHES: [TreeHash; 24] = [
179    th!("4bf5122f344554c53bde2ebb8cd2b7e3d1600ad631c385a5d7cce23c7785459a"),
180    th!("9dcf97a184f32623d11a73124ceb99a5709b083721e878a16d78f596718ba7b2"),
181    th!("a12871fee210fb8619291eaea194581cbd2531e4b23759d225f6806923f63222"),
182    th!("c79b932e1e1da3c0e098e5ad2c422937eb904a76cf61d83975a74a68fbb04b99"),
183    th!("a8d5dd63fba471ebcb1f3e8f7c1e1879b7152a6e7298a91ce119a63400ade7c5"),
184    th!("bc5959f43bc6e47175374b6716e53c9a7d72c59424c821336995bad760d9aeb3"),
185    th!("44602a999abbebedf7de0ae1318e4f57e3cb1d67e482a65f9657f7541f3fe4bb"),
186    th!("ca6c6588fa01171b200740344d354e8548b7470061fb32a34f4feee470ec281f"),
187    th!("9e6282e4f25e370ce617e21d6fe265e88b9e7b8682cf00059b9d128d9381f09d"),
188    th!("ac9e61d54eb6967e212c06aab15408292f8558c48f06f9d705150063c68753b0"),
189    th!("c04b5bb1a5b2eb3e9cd4805420dba5a9d133da5b7adeeafb5474c4adae9faa80"),
190    th!("57bfd1cb0adda3d94315053fda723f2028320faa8338225d99f629e3d46d43a9"),
191    th!("6b6daa8334bbcc8f6b5906b6c04be041d92700b74024f73f50e0a9f0dae5f06f"),
192    th!("c7b89cfb9abf2c4cb212a4840b37d762f4c880b8517b0dadb0c310ded24dd86d"),
193    th!("653b3bb3e18ef84d5b1e8ff9884aecf1950c7a1c98715411c22b987663b86dda"),
194    th!("24255ef5d941493b9978f3aabb0ed07d084ade196d23f463ff058954cbf6e9b6"),
195    th!("af340aa58ea7d72c2f9a7405f3734167bb27dd2a520d216addef65f8362102b6"),
196    th!("26e7f98cfafee5b213726e22632923bf31bf3e988233235f8f5ca5466b3ac0ed"),
197    th!("115b498ce94335826baa16386cd1e2fde8ca408f6f50f3785964f263cdf37ebe"),
198    th!("d8c50d6282a1ba47f0a23430d177bbfbb72e2b84713745e894f575570f1f3d6e"),
199    th!("dbe726e81a7221a385e007ef9e834a975a4b528c6f55a5d2ece288bee831a3d1"),
200    th!("764c8a3561c7cf261771b4e1969b84c210836f3c034baebac5e49a394a6ee0a9"),
201    th!("dce37f3512b6337d27290436ba9289e2fd6c775494c33668dd177cf811fbd47a"),
202    th!("5809addc9f6926fc5c4e20cf87958858c4454c21cdfc6b02f377f12c06b35cca"),
203];
204
205pub fn tree_hash_atom(bytes: &[u8]) -> TreeHash {
206    let mut sha256 = Sha256::new();
207    sha256.update([1]);
208    sha256.update(bytes);
209    TreeHash::new(sha256.finalize())
210}
211
212pub fn tree_hash_pair(first: TreeHash, rest: TreeHash) -> TreeHash {
213    let mut sha256 = Sha256::new();
214    sha256.update([2]);
215    sha256.update(first);
216    sha256.update(rest);
217    TreeHash::new(sha256.finalize())
218}
219
220pub fn tree_hash(a: &Allocator, node: NodePtr) -> TreeHash {
221    let mut hashes = Vec::new();
222    let mut ops = vec![TreeOp::SExp(node)];
223
224    while let Some(op) = ops.pop() {
225        match op {
226            TreeOp::SExp(node) => match a.node(node) {
227                NodeVisitor::Buffer(bytes) => {
228                    hashes.push(tree_hash_atom(bytes));
229                }
230                NodeVisitor::U32(val) => {
231                    if (val as usize) < PRECOMPUTED_HASHES.len() {
232                        hashes.push(PRECOMPUTED_HASHES[val as usize]);
233                    } else {
234                        hashes.push(tree_hash_atom(a.atom(node).as_ref()));
235                    }
236                }
237                NodeVisitor::Pair(left, right) => {
238                    ops.push(TreeOp::Cons);
239                    ops.push(TreeOp::SExp(left));
240                    ops.push(TreeOp::SExp(right));
241                }
242            },
243            TreeOp::Cons => {
244                let first = hashes.pop().unwrap();
245                let rest = hashes.pop().unwrap();
246                hashes.push(tree_hash_pair(first, rest));
247            }
248            TreeOp::ConsAddCache(_) => unreachable!(),
249        }
250    }
251
252    assert!(hashes.len() == 1);
253    hashes[0]
254}
255
256pub fn tree_hash_cached(a: &Allocator, node: NodePtr, cache: &mut TreeCache) -> TreeHash {
257    cache.visit_tree(a, node);
258
259    let mut hashes = Vec::new();
260    let mut ops = vec![TreeOp::SExp(node)];
261
262    while let Some(op) = ops.pop() {
263        match op {
264            TreeOp::SExp(node) => match a.node(node) {
265                NodeVisitor::Buffer(bytes) => {
266                    let hash = tree_hash_atom(bytes);
267                    hashes.push(hash);
268                }
269                NodeVisitor::U32(val) => {
270                    if (val as usize) < PRECOMPUTED_HASHES.len() {
271                        hashes.push(PRECOMPUTED_HASHES[val as usize]);
272                    } else {
273                        hashes.push(tree_hash_atom(a.atom(node).as_ref()));
274                    }
275                }
276                NodeVisitor::Pair(left, right) => {
277                    if let Some(hash) = cache.get(node) {
278                        hashes.push(*hash);
279                    } else {
280                        if cache.should_memoize(node) {
281                            ops.push(TreeOp::ConsAddCache(node));
282                        } else {
283                            ops.push(TreeOp::Cons);
284                        }
285                        ops.push(TreeOp::SExp(left));
286                        ops.push(TreeOp::SExp(right));
287                    }
288                }
289            },
290            TreeOp::Cons => {
291                let first = hashes.pop().unwrap();
292                let rest = hashes.pop().unwrap();
293                hashes.push(tree_hash_pair(first, rest));
294            }
295            TreeOp::ConsAddCache(original_node) => {
296                let first = hashes.pop().unwrap();
297                let rest = hashes.pop().unwrap();
298                let hash = tree_hash_pair(first, rest);
299                hashes.push(hash);
300                cache.insert(original_node, &hash);
301            }
302        }
303    }
304
305    assert!(hashes.len() == 1);
306    hashes[0]
307}
308
309pub fn tree_hash_from_bytes(buf: &[u8]) -> io::Result<TreeHash> {
310    let mut a = Allocator::new();
311    let node = node_from_bytes_backrefs(&mut a, buf)?;
312    let mut cache = TreeCache::default();
313    Ok(tree_hash_cached(&a, node, &mut cache))
314}
315
316#[test]
317fn test_tree_hash() {
318    let mut a = Allocator::new();
319    let atom1 = a.new_atom(&[1, 2, 3]).unwrap();
320    let atom2 = a.new_atom(&[4, 5, 6]).unwrap();
321    let root = a.new_pair(atom1, atom2).unwrap();
322
323    // test atom1 hash
324    let atom1_hash = {
325        let mut sha256 = Sha256::new();
326        sha256.update([1_u8]);
327        sha256.update([1, 2, 3]);
328        let atom1_hash = sha256.finalize();
329
330        assert_eq!(tree_hash(&a, atom1).as_ref(), atom1_hash.as_slice());
331        atom1_hash
332    };
333
334    // test atom2 hash
335    let atom2_hash = {
336        let mut sha256 = Sha256::new();
337        sha256.update([1_u8]);
338        sha256.update([4, 5, 6]);
339        let atom2_hash = sha256.finalize();
340
341        assert_eq!(tree_hash(&a, atom2).as_ref(), atom2_hash.as_slice());
342        atom2_hash
343    };
344
345    // test tree hash
346    let root_hash = {
347        let mut sha256 = Sha256::new();
348        sha256.update([2_u8]);
349        sha256.update(atom1_hash.as_slice());
350        sha256.update(atom2_hash.as_slice());
351        let root_hash = sha256.finalize();
352
353        assert_eq!(tree_hash(&a, root).as_ref(), root_hash.as_slice());
354        root_hash
355    };
356
357    let atom3 = a.new_atom(&[7, 8, 9]).unwrap();
358    let root2 = a.new_pair(root, atom3).unwrap();
359
360    let atom3_hash = {
361        let mut sha256 = Sha256::new();
362        sha256.update([1_u8]);
363        sha256.update([7, 8, 9]);
364        sha256.finalize()
365    };
366
367    // test deeper tree hash
368    {
369        let mut sha256 = Sha256::new();
370        sha256.update([2_u8]);
371        sha256.update(root_hash.as_slice());
372        sha256.update(atom3_hash.as_slice());
373
374        assert_eq!(tree_hash(&a, root2).as_ref(), sha256.finalize().as_slice());
375    }
376}
377
378#[test]
379fn test_tree_hash_from_bytes() {
380    use clvmr::serde::{node_to_bytes, node_to_bytes_backrefs};
381
382    let mut a = Allocator::new();
383    let atom1 = a.new_atom(&[1, 2, 3]).unwrap();
384    let atom2 = a.new_atom(&[4, 5, 6]).unwrap();
385    let node1 = a.new_pair(atom1, atom2).unwrap();
386    let node2 = a.new_pair(atom2, atom1).unwrap();
387
388    let node1 = a.new_pair(node1, node1).unwrap();
389    let node2 = a.new_pair(node2, node2).unwrap();
390
391    let root = a.new_pair(node1, node2).unwrap();
392
393    let serialized_clvm = node_to_bytes(&a, root).expect("node_to_bytes");
394    let serialized_clvm_backrefs =
395        node_to_bytes_backrefs(&a, root).expect("node_to_bytes_backrefs");
396
397    let hash1 = tree_hash_from_bytes(&serialized_clvm).expect("tree_hash_from_bytes");
398    let hash2 = tree_hash_from_bytes(&serialized_clvm_backrefs).expect("tree_hash_from_bytes");
399    let hash3 = tree_hash(&a, root);
400
401    assert!(serialized_clvm.len() > serialized_clvm_backrefs.len());
402    assert_eq!(hash1, hash2);
403    assert_eq!(hash1, hash3);
404}
405
406#[cfg(test)]
407use rstest::rstest;
408
409#[cfg(test)]
410#[rstest]
411#[case(
412    "block-1ee588dc",
413    "1cba0b22b84b597d265d77fbabb57fada01d963f75dc3956a6166a2385997ef2"
414)]
415#[case(
416    "block-6fe59b24",
417    "540c5afac7c26728ed6b7891d8ce2f5b26009c4b0090d7035403c2425dc54e1d"
418)]
419#[case(
420    "block-b45268ac",
421    "7cc321f5554126c9f430afbc7dd9c804f5d34a248e3192f275f5d585ecf8e873"
422)]
423#[case(
424    "block-c2a8df0d",
425    "2e25efa524e420111006fee77f50fb8fbd725920a5312d5480af239d81ab5e7e"
426)]
427#[case(
428    "block-e5002df2",
429    "c179ece232dceef984ba000f7e5b67ee3092582668bf6178969df10845eb8b18"
430)]
431#[case(
432    "block-4671894",
433    "3750f0e1bde9fcb407135f974aa276a4580e1e76a47e6d8d9bb2911d0fe91db1"
434)]
435#[case(
436    "block-225758",
437    "880df94c3c9e0f7c26c42ae99723e683a4cd37e73f74c6322d1dfabaa1d64d93"
438)]
439#[case(
440    "block-834752",
441    "be755b8ef03d917b8bd37ae152792a7daa7de81bbb0eaa21c530571c2105c130"
442)]
443#[case(
444    "block-834752-compressed",
445    "be755b8ef03d917b8bd37ae152792a7daa7de81bbb0eaa21c530571c2105c130"
446)]
447#[case(
448    "block-834760",
449    "77558768f74c5f863b36232a1390843a63a397fc22da1321fea3a05eab67be2c"
450)]
451#[case(
452    "block-834761",
453    "4bac8b299c6545a37a825883c863b79ce850e7f6c8f1d2abeec2865f5450f1c5"
454)]
455#[case(
456    "block-834765",
457    "b915ec5f9f8ea723e0a99b035df206673369b802766dd76b6c8f4c15ab7bca2c"
458)]
459#[case(
460    "block-834766",
461    "409559c3395fb18a6c3390ccccd55e82162b1e68b867490a90ccbddf78147c9d"
462)]
463#[case(
464    "block-834768",
465    "905441945a9a56558337c8b7a536a6b9606ad63e11a265a938f301747ccfb7af"
466)]
467fn test_tree_hash_cached(
468    #[case] name: &str,
469    #[case] expect: &str,
470    #[values(true, false)] compressed: bool,
471) {
472    use clvmr::serde::{node_from_bytes_backrefs, node_to_bytes_backrefs};
473    use std::fs::read_to_string;
474
475    let filename = format!("../../generator-tests/{name}.txt");
476    println!("file: {filename}",);
477    let test_file = read_to_string(filename).expect("test file not found");
478    let generator = test_file.lines().next().expect("invalid test file");
479    let generator = hex::decode(generator).expect("invalid hex encoded generator");
480
481    let generator = if compressed {
482        let mut a = Allocator::new();
483        let node = node_from_bytes_backrefs(&mut a, &generator).expect("node_from_bytes_backrefs");
484        node_to_bytes_backrefs(&a, node).expect("node_to_bytes_backrefs")
485    } else {
486        generator
487    };
488
489    let mut a = Allocator::new();
490    let mut cache = TreeCache::default();
491    let node = node_from_bytes_backrefs(&mut a, &generator).expect("node_from_bytes_backrefs");
492
493    let hash1 = tree_hash(&a, node);
494    let hash2 = tree_hash_cached(&a, node, &mut cache);
495    // for (key, value) in cache.iter() {
496    //     println!("  {key:?}: {}", hex::encode(value));
497    // }
498    assert_eq!(hash1, hash2);
499    assert_eq!(hash1.as_ref(), hex::decode(expect).unwrap().as_slice());
500}
501
502#[cfg(test)]
503fn test_sha256_atom(buf: &[u8]) {
504    let hash = tree_hash_atom(buf);
505
506    let mut hasher = Sha256::new();
507    hasher.update([1_u8]);
508    if !buf.is_empty() {
509        hasher.update(buf);
510    }
511
512    assert_eq!(hash.as_ref(), hasher.finalize().as_slice());
513}
514
515#[test]
516fn test_tree_hash_atom() {
517    test_sha256_atom(&[]);
518    for val in 0..=255 {
519        test_sha256_atom(&[val]);
520    }
521
522    for val in 0..=255 {
523        test_sha256_atom(&[0, val]);
524    }
525
526    for val in 0..=255 {
527        test_sha256_atom(&[0xff, val]);
528    }
529}
530
531#[test]
532fn test_precomputed_atoms() {
533    assert_eq!(tree_hash_atom(&[]), PRECOMPUTED_HASHES[0]);
534    for val in 1..(PRECOMPUTED_HASHES.len() as u8) {
535        assert_eq!(tree_hash_atom(&[val]), PRECOMPUTED_HASHES[val as usize]);
536    }
537}
538
539#[test]
540fn test_tree_cache_get() {
541    let mut allocator = Allocator::new();
542    let mut cache = TreeCache::default();
543
544    let a = allocator.nil();
545    let b = allocator.one();
546    let c = allocator.new_pair(a, b).expect("new_pair");
547
548    assert_eq!(cache.get(a), None);
549    assert_eq!(cache.get(b), None);
550    assert_eq!(cache.get(c), None);
551
552    // We don't cache atoms
553    cache.insert(a, &tree_hash(&allocator, a));
554    assert_eq!(cache.get(a), None);
555
556    cache.insert(b, &tree_hash(&allocator, b));
557    assert_eq!(cache.get(b), None);
558
559    // but pair is OK
560    cache.insert(c, &tree_hash(&allocator, c));
561    assert_eq!(cache.get(c), Some(&tree_hash(&allocator, c)));
562}
563
564#[test]
565fn test_tree_cache_size_limit() {
566    let mut allocator = Allocator::new();
567    let mut cache = TreeCache::default();
568
569    let mut list = allocator.nil();
570    let mut hash = tree_hash(&allocator, list);
571    cache.insert(list, &hash);
572
573    // we only fit 65k items in the cache
574    for i in 0..65540 {
575        let b = allocator.one();
576        list = allocator.new_pair(b, list).expect("new_pair");
577        hash = tree_hash_pair(tree_hash_atom(b"\x01"), hash);
578        cache.insert(list, &hash);
579
580        println!("{i}");
581        if i < 65533 {
582            assert_eq!(cache.get(list), Some(&hash));
583        } else {
584            assert_eq!(cache.get(list), None);
585        }
586    }
587    assert_eq!(cache.get(list), None);
588}
589
590#[test]
591fn test_tree_cache_should_memoize() {
592    let mut allocator = Allocator::new();
593    let mut cache = TreeCache::default();
594
595    let a = allocator.nil();
596    let b = allocator.one();
597    let c = allocator.new_pair(a, b).expect("new_pair");
598
599    assert!(!cache.should_memoize(a));
600    assert!(!cache.should_memoize(b));
601    assert!(!cache.should_memoize(c));
602
603    // we need to visit a node at least twice for it to be considered a
604    // candidate for memoization
605    assert!(cache.visit(c));
606    assert!(!cache.should_memoize(c));
607    assert!(!cache.visit(c));
608
609    assert!(cache.should_memoize(c));
610}