clvm_utils/
tree_hash.rs

1use chia_sha2::Sha256;
2use clvmr::allocator::{Allocator, NodePtr, NodeVisitor, ObjectType, SExp};
3use clvmr::error::EvalErr;
4use clvmr::serde::node_from_bytes_backrefs;
5use hex_literal::hex;
6use std::fmt;
7use std::ops::Deref;
8
9#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
10pub struct TreeHash([u8; 32]);
11
12impl TreeHash {
13    pub const fn new(hash: [u8; 32]) -> Self {
14        Self(hash)
15    }
16
17    pub const fn to_bytes(&self) -> [u8; 32] {
18        self.0
19    }
20
21    pub fn to_vec(&self) -> Vec<u8> {
22        self.0.to_vec()
23    }
24}
25
26impl fmt::Debug for TreeHash {
27    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28        write!(f, "TreeHash({self})")
29    }
30}
31
32impl fmt::Display for TreeHash {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        write!(f, "{}", hex::encode(self.0))
35    }
36}
37
38impl From<[u8; 32]> for TreeHash {
39    fn from(hash: [u8; 32]) -> Self {
40        Self::new(hash)
41    }
42}
43
44impl From<TreeHash> for [u8; 32] {
45    fn from(hash: TreeHash) -> [u8; 32] {
46        hash.0
47    }
48}
49
50impl AsRef<[u8]> for TreeHash {
51    fn as_ref(&self) -> &[u8] {
52        &self.0
53    }
54}
55
56impl Deref for TreeHash {
57    type Target = [u8];
58
59    fn deref(&self) -> &Self::Target {
60        &self.0
61    }
62}
63
64#[derive(Default)]
65pub struct TreeCache {
66    hashes: Vec<TreeHash>,
67    // each entry is an index into hashes, or one of 3 special values:
68    // u16::MAX if the pair has not been visited
69    // u16::MAX - 1 if the pair has been seen once
70    // u16::MAX - 2 if the pair has been seen at least twice (this makes it a
71    // candidate for memoization)
72    pairs: Vec<u16>,
73}
74
75const NOT_VISITED: u16 = u16::MAX;
76const SEEN_ONCE: u16 = u16::MAX - 1;
77const SEEN_MULTIPLE: u16 = u16::MAX - 2;
78
79impl TreeCache {
80    pub fn get(&self, n: NodePtr) -> Option<&TreeHash> {
81        // We only cache pairs (for now)
82        if !matches!(n.object_type(), ObjectType::Pair) {
83            return None;
84        }
85
86        let idx = n.index() as usize;
87        let slot = *self.pairs.get(idx)?;
88        if slot >= SEEN_MULTIPLE {
89            return None;
90        }
91        Some(&self.hashes[slot as usize])
92    }
93
94    pub fn insert(&mut self, n: NodePtr, hash: &TreeHash) {
95        // If we've reached the max size, just ignore new cache items
96        if self.hashes.len() == SEEN_MULTIPLE as usize {
97            return;
98        }
99
100        if !matches!(n.object_type(), ObjectType::Pair) {
101            return;
102        }
103
104        let idx = n.index() as usize;
105        if idx >= self.pairs.len() {
106            self.pairs.resize(idx + 1, NOT_VISITED);
107        }
108
109        let slot = self.hashes.len();
110        self.hashes.push(*hash);
111        self.pairs[idx] = slot as u16;
112    }
113
114    /// mark the node as being visited. Returns true if we need to
115    /// traverse visitation down this node.
116    fn visit(&mut self, n: NodePtr) -> bool {
117        if !matches!(n.object_type(), ObjectType::Pair) {
118            return false;
119        }
120        let idx = n.index() as usize;
121        if idx >= self.pairs.len() {
122            self.pairs.resize(idx + 1, NOT_VISITED);
123        }
124        if self.pairs[idx] > SEEN_MULTIPLE {
125            self.pairs[idx] -= 1;
126        }
127        self.pairs[idx] == SEEN_ONCE
128    }
129
130    pub fn should_memoize(&mut self, n: NodePtr) -> bool {
131        if !matches!(n.object_type(), ObjectType::Pair) {
132            return false;
133        }
134        let idx = n.index() as usize;
135        if idx >= self.pairs.len() {
136            false
137        } else {
138            self.pairs[idx] <= SEEN_MULTIPLE
139        }
140    }
141
142    pub fn visit_tree(&mut self, a: &Allocator, node: NodePtr) {
143        if !self.visit(node) {
144            return;
145        }
146        let mut nodes = vec![node];
147        while let Some(n) = nodes.pop() {
148            let SExp::Pair(left, right) = a.sexp(n) else {
149                continue;
150            };
151            if self.visit(left) {
152                nodes.push(left);
153            }
154            if self.visit(right) {
155                nodes.push(right);
156            }
157        }
158    }
159}
160
161enum TreeOp {
162    SExp(NodePtr),
163    Cons,
164    ConsAddCache(NodePtr),
165}
166
167// contains SHA256(1 .. x), where x is the index into the array and .. is
168// concatenation. This was computed by:
169// from hashlib import sha256
170// print(f"    th!(\"{sha256(bytes([1])).hexdigest()}\"),")
171// for i in range(1, 24):
172//     print(f"    th!(\"{sha256(bytes([1, i])).hexdigest()}\"),")
173
174macro_rules! th {
175    ($hash:expr) => {
176        TreeHash::new(hex!($hash))
177    };
178}
179pub const PRECOMPUTED_HASHES: [TreeHash; 24] = [
180    th!("4bf5122f344554c53bde2ebb8cd2b7e3d1600ad631c385a5d7cce23c7785459a"),
181    th!("9dcf97a184f32623d11a73124ceb99a5709b083721e878a16d78f596718ba7b2"),
182    th!("a12871fee210fb8619291eaea194581cbd2531e4b23759d225f6806923f63222"),
183    th!("c79b932e1e1da3c0e098e5ad2c422937eb904a76cf61d83975a74a68fbb04b99"),
184    th!("a8d5dd63fba471ebcb1f3e8f7c1e1879b7152a6e7298a91ce119a63400ade7c5"),
185    th!("bc5959f43bc6e47175374b6716e53c9a7d72c59424c821336995bad760d9aeb3"),
186    th!("44602a999abbebedf7de0ae1318e4f57e3cb1d67e482a65f9657f7541f3fe4bb"),
187    th!("ca6c6588fa01171b200740344d354e8548b7470061fb32a34f4feee470ec281f"),
188    th!("9e6282e4f25e370ce617e21d6fe265e88b9e7b8682cf00059b9d128d9381f09d"),
189    th!("ac9e61d54eb6967e212c06aab15408292f8558c48f06f9d705150063c68753b0"),
190    th!("c04b5bb1a5b2eb3e9cd4805420dba5a9d133da5b7adeeafb5474c4adae9faa80"),
191    th!("57bfd1cb0adda3d94315053fda723f2028320faa8338225d99f629e3d46d43a9"),
192    th!("6b6daa8334bbcc8f6b5906b6c04be041d92700b74024f73f50e0a9f0dae5f06f"),
193    th!("c7b89cfb9abf2c4cb212a4840b37d762f4c880b8517b0dadb0c310ded24dd86d"),
194    th!("653b3bb3e18ef84d5b1e8ff9884aecf1950c7a1c98715411c22b987663b86dda"),
195    th!("24255ef5d941493b9978f3aabb0ed07d084ade196d23f463ff058954cbf6e9b6"),
196    th!("af340aa58ea7d72c2f9a7405f3734167bb27dd2a520d216addef65f8362102b6"),
197    th!("26e7f98cfafee5b213726e22632923bf31bf3e988233235f8f5ca5466b3ac0ed"),
198    th!("115b498ce94335826baa16386cd1e2fde8ca408f6f50f3785964f263cdf37ebe"),
199    th!("d8c50d6282a1ba47f0a23430d177bbfbb72e2b84713745e894f575570f1f3d6e"),
200    th!("dbe726e81a7221a385e007ef9e834a975a4b528c6f55a5d2ece288bee831a3d1"),
201    th!("764c8a3561c7cf261771b4e1969b84c210836f3c034baebac5e49a394a6ee0a9"),
202    th!("dce37f3512b6337d27290436ba9289e2fd6c775494c33668dd177cf811fbd47a"),
203    th!("5809addc9f6926fc5c4e20cf87958858c4454c21cdfc6b02f377f12c06b35cca"),
204];
205
206pub fn tree_hash_atom(bytes: &[u8]) -> TreeHash {
207    let mut sha256 = Sha256::new();
208    sha256.update([1]);
209    sha256.update(bytes);
210    TreeHash::new(sha256.finalize())
211}
212
213pub fn tree_hash_pair(first: TreeHash, rest: TreeHash) -> TreeHash {
214    let mut sha256 = Sha256::new();
215    sha256.update([2]);
216    sha256.update(first);
217    sha256.update(rest);
218    TreeHash::new(sha256.finalize())
219}
220
221pub fn tree_hash(a: &Allocator, node: NodePtr) -> TreeHash {
222    let mut hashes = Vec::new();
223    let mut ops = vec![TreeOp::SExp(node)];
224
225    while let Some(op) = ops.pop() {
226        match op {
227            TreeOp::SExp(node) => match a.node(node) {
228                NodeVisitor::Buffer(bytes) => {
229                    hashes.push(tree_hash_atom(bytes));
230                }
231                NodeVisitor::U32(val) => {
232                    if (val as usize) < PRECOMPUTED_HASHES.len() {
233                        hashes.push(PRECOMPUTED_HASHES[val as usize]);
234                    } else {
235                        hashes.push(tree_hash_atom(a.atom(node).as_ref()));
236                    }
237                }
238                NodeVisitor::Pair(left, right) => {
239                    ops.push(TreeOp::Cons);
240                    ops.push(TreeOp::SExp(left));
241                    ops.push(TreeOp::SExp(right));
242                }
243            },
244            TreeOp::Cons => {
245                let first = hashes.pop().unwrap();
246                let rest = hashes.pop().unwrap();
247                hashes.push(tree_hash_pair(first, rest));
248            }
249            TreeOp::ConsAddCache(_) => unreachable!(),
250        }
251    }
252
253    assert!(hashes.len() == 1);
254    hashes[0]
255}
256
257pub fn tree_hash_cached(a: &Allocator, node: NodePtr, cache: &mut TreeCache) -> TreeHash {
258    cache.visit_tree(a, node);
259
260    let mut hashes = Vec::new();
261    let mut ops = vec![TreeOp::SExp(node)];
262
263    while let Some(op) = ops.pop() {
264        match op {
265            TreeOp::SExp(node) => match a.node(node) {
266                NodeVisitor::Buffer(bytes) => {
267                    let hash = tree_hash_atom(bytes);
268                    hashes.push(hash);
269                }
270                NodeVisitor::U32(val) => {
271                    if (val as usize) < PRECOMPUTED_HASHES.len() {
272                        hashes.push(PRECOMPUTED_HASHES[val as usize]);
273                    } else {
274                        hashes.push(tree_hash_atom(a.atom(node).as_ref()));
275                    }
276                }
277                NodeVisitor::Pair(left, right) => {
278                    if let Some(hash) = cache.get(node) {
279                        hashes.push(*hash);
280                    } else {
281                        if cache.should_memoize(node) {
282                            ops.push(TreeOp::ConsAddCache(node));
283                        } else {
284                            ops.push(TreeOp::Cons);
285                        }
286                        ops.push(TreeOp::SExp(left));
287                        ops.push(TreeOp::SExp(right));
288                    }
289                }
290            },
291            TreeOp::Cons => {
292                let first = hashes.pop().unwrap();
293                let rest = hashes.pop().unwrap();
294                hashes.push(tree_hash_pair(first, rest));
295            }
296            TreeOp::ConsAddCache(original_node) => {
297                let first = hashes.pop().unwrap();
298                let rest = hashes.pop().unwrap();
299                let hash = tree_hash_pair(first, rest);
300                hashes.push(hash);
301                cache.insert(original_node, &hash);
302            }
303        }
304    }
305
306    assert!(hashes.len() == 1);
307    hashes[0]
308}
309
310pub fn tree_hash_from_bytes(buf: &[u8]) -> Result<TreeHash, EvalErr> {
311    let mut a = Allocator::new();
312    let node = node_from_bytes_backrefs(&mut a, buf)?;
313    let mut cache = TreeCache::default();
314    Ok(tree_hash_cached(&a, node, &mut cache))
315}
316
317#[test]
318fn test_tree_hash() {
319    let mut a = Allocator::new();
320    let atom1 = a.new_atom(&[1, 2, 3]).unwrap();
321    let atom2 = a.new_atom(&[4, 5, 6]).unwrap();
322    let root = a.new_pair(atom1, atom2).unwrap();
323
324    // test atom1 hash
325    let atom1_hash = {
326        let mut sha256 = Sha256::new();
327        sha256.update([1_u8]);
328        sha256.update([1, 2, 3]);
329        let atom1_hash = sha256.finalize();
330
331        assert_eq!(tree_hash(&a, atom1).as_ref(), atom1_hash.as_slice());
332        atom1_hash
333    };
334
335    // test atom2 hash
336    let atom2_hash = {
337        let mut sha256 = Sha256::new();
338        sha256.update([1_u8]);
339        sha256.update([4, 5, 6]);
340        let atom2_hash = sha256.finalize();
341
342        assert_eq!(tree_hash(&a, atom2).as_ref(), atom2_hash.as_slice());
343        atom2_hash
344    };
345
346    // test tree hash
347    let root_hash = {
348        let mut sha256 = Sha256::new();
349        sha256.update([2_u8]);
350        sha256.update(atom1_hash.as_slice());
351        sha256.update(atom2_hash.as_slice());
352        let root_hash = sha256.finalize();
353
354        assert_eq!(tree_hash(&a, root).as_ref(), root_hash.as_slice());
355        root_hash
356    };
357
358    let atom3 = a.new_atom(&[7, 8, 9]).unwrap();
359    let root2 = a.new_pair(root, atom3).unwrap();
360
361    let atom3_hash = {
362        let mut sha256 = Sha256::new();
363        sha256.update([1_u8]);
364        sha256.update([7, 8, 9]);
365        sha256.finalize()
366    };
367
368    // test deeper tree hash
369    {
370        let mut sha256 = Sha256::new();
371        sha256.update([2_u8]);
372        sha256.update(root_hash.as_slice());
373        sha256.update(atom3_hash.as_slice());
374
375        assert_eq!(tree_hash(&a, root2).as_ref(), sha256.finalize().as_slice());
376    }
377}
378
379#[test]
380fn test_tree_hash_from_bytes() {
381    use clvmr::serde::{node_to_bytes, node_to_bytes_backrefs};
382
383    let mut a = Allocator::new();
384    let atom1 = a.new_atom(&[1, 2, 3]).unwrap();
385    let atom2 = a.new_atom(&[4, 5, 6]).unwrap();
386    let node1 = a.new_pair(atom1, atom2).unwrap();
387    let node2 = a.new_pair(atom2, atom1).unwrap();
388
389    let node1 = a.new_pair(node1, node1).unwrap();
390    let node2 = a.new_pair(node2, node2).unwrap();
391
392    let root = a.new_pair(node1, node2).unwrap();
393
394    let serialized_clvm = node_to_bytes(&a, root).expect("node_to_bytes");
395    let serialized_clvm_backrefs =
396        node_to_bytes_backrefs(&a, root).expect("node_to_bytes_backrefs");
397
398    let hash1 = tree_hash_from_bytes(&serialized_clvm).expect("tree_hash_from_bytes");
399    let hash2 = tree_hash_from_bytes(&serialized_clvm_backrefs).expect("tree_hash_from_bytes");
400    let hash3 = tree_hash(&a, root);
401
402    assert!(serialized_clvm.len() > serialized_clvm_backrefs.len());
403    assert_eq!(hash1, hash2);
404    assert_eq!(hash1, hash3);
405}
406
407#[cfg(test)]
408use rstest::rstest;
409
410#[cfg(test)]
411#[rstest]
412#[case(
413    "block-1ee588dc",
414    "1cba0b22b84b597d265d77fbabb57fada01d963f75dc3956a6166a2385997ef2"
415)]
416#[case(
417    "block-6fe59b24",
418    "540c5afac7c26728ed6b7891d8ce2f5b26009c4b0090d7035403c2425dc54e1d"
419)]
420#[case(
421    "block-b45268ac",
422    "7cc321f5554126c9f430afbc7dd9c804f5d34a248e3192f275f5d585ecf8e873"
423)]
424#[case(
425    "block-c2a8df0d",
426    "2e25efa524e420111006fee77f50fb8fbd725920a5312d5480af239d81ab5e7e"
427)]
428#[case(
429    "block-e5002df2",
430    "c179ece232dceef984ba000f7e5b67ee3092582668bf6178969df10845eb8b18"
431)]
432#[case(
433    "block-4671894",
434    "3750f0e1bde9fcb407135f974aa276a4580e1e76a47e6d8d9bb2911d0fe91db1"
435)]
436#[case(
437    "block-225758",
438    "880df94c3c9e0f7c26c42ae99723e683a4cd37e73f74c6322d1dfabaa1d64d93"
439)]
440#[case(
441    "block-834752",
442    "be755b8ef03d917b8bd37ae152792a7daa7de81bbb0eaa21c530571c2105c130"
443)]
444#[case(
445    "block-834752-compressed",
446    "be755b8ef03d917b8bd37ae152792a7daa7de81bbb0eaa21c530571c2105c130"
447)]
448#[case(
449    "block-834760",
450    "77558768f74c5f863b36232a1390843a63a397fc22da1321fea3a05eab67be2c"
451)]
452#[case(
453    "block-834761",
454    "4bac8b299c6545a37a825883c863b79ce850e7f6c8f1d2abeec2865f5450f1c5"
455)]
456#[case(
457    "block-834765",
458    "b915ec5f9f8ea723e0a99b035df206673369b802766dd76b6c8f4c15ab7bca2c"
459)]
460#[case(
461    "block-834766",
462    "409559c3395fb18a6c3390ccccd55e82162b1e68b867490a90ccbddf78147c9d"
463)]
464#[case(
465    "block-834768",
466    "905441945a9a56558337c8b7a536a6b9606ad63e11a265a938f301747ccfb7af"
467)]
468fn test_tree_hash_cached(
469    #[case] name: &str,
470    #[case] expect: &str,
471    #[values(true, false)] compressed: bool,
472) {
473    use clvmr::serde::{node_from_bytes_backrefs, node_to_bytes_backrefs};
474    use std::fs::read_to_string;
475
476    let filename = format!("../../generator-tests/{name}.txt");
477    println!("file: {filename}",);
478    let test_file = read_to_string(filename).expect("test file not found");
479    let generator = test_file.lines().next().expect("invalid test file");
480    let generator = hex::decode(generator).expect("invalid hex encoded generator");
481
482    let generator = if compressed {
483        let mut a = Allocator::new();
484        let node = node_from_bytes_backrefs(&mut a, &generator).expect("node_from_bytes_backrefs");
485        node_to_bytes_backrefs(&a, node).expect("node_to_bytes_backrefs")
486    } else {
487        generator
488    };
489
490    let mut a = Allocator::new();
491    let mut cache = TreeCache::default();
492    let node = node_from_bytes_backrefs(&mut a, &generator).expect("node_from_bytes_backrefs");
493
494    let hash1 = tree_hash(&a, node);
495    let hash2 = tree_hash_cached(&a, node, &mut cache);
496    // for (key, value) in cache.iter() {
497    //     println!("  {key:?}: {}", hex::encode(value));
498    // }
499    assert_eq!(hash1, hash2);
500    assert_eq!(hash1.as_ref(), hex::decode(expect).unwrap().as_slice());
501}
502
503#[cfg(test)]
504fn test_sha256_atom(buf: &[u8]) {
505    let hash = tree_hash_atom(buf);
506
507    let mut hasher = Sha256::new();
508    hasher.update([1_u8]);
509    if !buf.is_empty() {
510        hasher.update(buf);
511    }
512
513    assert_eq!(hash.as_ref(), hasher.finalize().as_slice());
514}
515
516#[test]
517fn test_tree_hash_atom() {
518    test_sha256_atom(&[]);
519    for val in 0..=255 {
520        test_sha256_atom(&[val]);
521    }
522
523    for val in 0..=255 {
524        test_sha256_atom(&[0, val]);
525    }
526
527    for val in 0..=255 {
528        test_sha256_atom(&[0xff, val]);
529    }
530}
531
532#[test]
533fn test_precomputed_atoms() {
534    assert_eq!(tree_hash_atom(&[]), PRECOMPUTED_HASHES[0]);
535    for val in 1..(PRECOMPUTED_HASHES.len() as u8) {
536        assert_eq!(tree_hash_atom(&[val]), PRECOMPUTED_HASHES[val as usize]);
537    }
538}
539
540#[test]
541fn test_tree_cache_get() {
542    let mut allocator = Allocator::new();
543    let mut cache = TreeCache::default();
544
545    let a = allocator.nil();
546    let b = allocator.one();
547    let c = allocator.new_pair(a, b).expect("new_pair");
548
549    assert_eq!(cache.get(a), None);
550    assert_eq!(cache.get(b), None);
551    assert_eq!(cache.get(c), None);
552
553    // We don't cache atoms
554    cache.insert(a, &tree_hash(&allocator, a));
555    assert_eq!(cache.get(a), None);
556
557    cache.insert(b, &tree_hash(&allocator, b));
558    assert_eq!(cache.get(b), None);
559
560    // but pair is OK
561    cache.insert(c, &tree_hash(&allocator, c));
562    assert_eq!(cache.get(c), Some(&tree_hash(&allocator, c)));
563}
564
565#[test]
566fn test_tree_cache_size_limit() {
567    let mut allocator = Allocator::new();
568    let mut cache = TreeCache::default();
569
570    let mut list = allocator.nil();
571    let mut hash = tree_hash(&allocator, list);
572    cache.insert(list, &hash);
573
574    // we only fit 65k items in the cache
575    for i in 0..65540 {
576        let b = allocator.one();
577        list = allocator.new_pair(b, list).expect("new_pair");
578        hash = tree_hash_pair(tree_hash_atom(b"\x01"), hash);
579        cache.insert(list, &hash);
580
581        println!("{i}");
582        if i < 65533 {
583            assert_eq!(cache.get(list), Some(&hash));
584        } else {
585            assert_eq!(cache.get(list), None);
586        }
587    }
588    assert_eq!(cache.get(list), None);
589}
590
591#[test]
592fn test_tree_cache_should_memoize() {
593    let mut allocator = Allocator::new();
594    let mut cache = TreeCache::default();
595
596    let a = allocator.nil();
597    let b = allocator.one();
598    let c = allocator.new_pair(a, b).expect("new_pair");
599
600    assert!(!cache.should_memoize(a));
601    assert!(!cache.should_memoize(b));
602    assert!(!cache.should_memoize(c));
603
604    // we need to visit a node at least twice for it to be considered a
605    // candidate for memoization
606    assert!(cache.visit(c));
607    assert!(!cache.should_memoize(c));
608    assert!(!cache.visit(c));
609
610    assert!(cache.should_memoize(c));
611}