wnfs_hamt/
diff.rs

1use super::HashNibbles;
2use crate::{Hasher, Node, Pair, Pointer, HAMT_BITMASK_BIT_SIZE};
3use anyhow::{Ok, Result};
4use async_recursion::async_recursion;
5use serde::{de::DeserializeOwned, Serialize};
6use std::{collections::HashMap, hash::Hash, mem};
7use wnfs_common::{
8    utils::{Arc, CondSync},
9    BlockStore, Link, Storable,
10};
11
12//--------------------------------------------------------------------------------------------------
13// Type Definitions
14//--------------------------------------------------------------------------------------------------
15
16/// This type represents the different kinds of changes to a node.
17#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
18pub enum ChangeType {
19    Add,
20    Remove,
21    Modify,
22}
23
24/// Represents a change to some key-value pair of a HAMT node.
25#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
26pub struct KeyValueChange<K, V> {
27    pub r#type: ChangeType,
28    pub key: K,
29    pub value1: Option<V>,
30    pub value2: Option<V>,
31}
32
33//--------------------------------------------------------------------------------------------------
34// Implementations
35//--------------------------------------------------------------------------------------------------
36
37impl<K, V> KeyValueChange<K, V> {
38    pub fn map<W>(self, f: &impl Fn(V) -> W) -> KeyValueChange<K, W> {
39        let Self {
40            r#type,
41            key,
42            value1,
43            value2,
44        } = self;
45        KeyValueChange {
46            r#type,
47            key,
48            value1: value1.map(f),
49            value2: value2.map(f),
50        }
51    }
52}
53
54//--------------------------------------------------------------------------------------------------
55// Functions
56//--------------------------------------------------------------------------------------------------
57
58/// Compare two nodes and get the key-value changes made to the main node.
59///
60/// This implementation gets all the changes to main node at the leaf node level.
61///
62/// This is a more expensive operation because it gathers the key value pairs under a node has
63/// been added or removed even though we can simply return a reference to the node itself.
64///
65/// # Examples
66///
67/// ```
68/// use std::sync::Arc;
69/// use wnfs_hamt::{Node, Pair, diff};
70/// use wnfs_common::{Link, MemoryBlockStore};
71///
72/// #[async_std::main]
73/// async fn main() {
74///     let store = &MemoryBlockStore::new();
75///     let main_node = &mut Arc::new(Node::<[u8; 4], String>::default());
76///     for i in 0u32..3 {
77///         main_node
78///             .set(i.to_le_bytes(), i.to_string(), store)
79///             .await
80///             .unwrap();
81///     }
82///
83///     let other_node = &mut Arc::new(Node::<[u8; 4], String>::default());
84///     other_node
85///         .set(0_u32.to_le_bytes(), 0_u32.to_string(), store)
86///         .await
87///         .unwrap();
88///
89///     let changes = diff(
90///         Link::from(Arc::clone(main_node)),
91///         Link::from(Arc::clone(other_node)),
92///         store,
93///     )
94///     .await
95///     .unwrap();
96///
97///
98///    println!("Changes {:#?}", changes);
99/// }
100/// ```
101pub async fn diff<K, V, H>(
102    main_link: Link<Arc<Node<K, V, H>>>,
103    other_link: Link<Arc<Node<K, V, H>>>,
104    store: &impl BlockStore,
105) -> Result<Vec<KeyValueChange<K, V>>>
106where
107    K: Storable + Clone + Eq + Hash + AsRef<[u8]> + CondSync,
108    V: Storable + Clone + Eq + CondSync,
109    K::Serializable: Serialize + DeserializeOwned,
110    V::Serializable: Serialize + DeserializeOwned,
111    H: Hasher + CondSync,
112{
113    diff_helper(main_link, other_link, 1, store).await
114}
115
116#[cfg_attr(not(target_arch = "wasm32"), async_recursion)]
117#[cfg_attr(target_arch = "wasm32", async_recursion(?Send))]
118pub async fn diff_helper<K, V, H>(
119    main_link: Link<Arc<Node<K, V, H>>>,
120    other_link: Link<Arc<Node<K, V, H>>>,
121    depth: usize,
122    store: &impl BlockStore,
123) -> Result<Vec<KeyValueChange<K, V>>>
124where
125    K: Storable + Clone + Eq + Hash + AsRef<[u8]> + CondSync,
126    V: Storable + Clone + Eq + CondSync,
127    K::Serializable: Serialize + DeserializeOwned,
128    V::Serializable: Serialize + DeserializeOwned,
129    H: Hasher + CondSync,
130{
131    // If Cids are available, check to see if they are equal so we can skip further comparisons.
132    if let (Some(cid), Some(cid2)) = (main_link.get_cid(), other_link.get_cid()) {
133        if cid == cid2 {
134            return Ok(vec![]);
135        }
136    }
137
138    // Otherwise, get nodes from store.
139    let mut main_node = main_link.resolve_owned_value(store).await?;
140
141    let mut other_node = other_link.resolve_owned_value(store).await?;
142
143    let mut changes = vec![];
144    for index in 0..HAMT_BITMASK_BIT_SIZE {
145        match (main_node.bitmask[index], other_node.bitmask[index]) {
146            (true, false) => {
147                // Main has a value, other doesn't.
148                changes.extend(
149                    generate_add_or_remove_changes(
150                        &main_node.pointers[main_node.get_value_index(index)],
151                        ChangeType::Add,
152                        store,
153                    )
154                    .await?,
155                );
156            }
157            (false, true) => {
158                // Main doesn't have a value, other does.
159                changes.extend(
160                    generate_add_or_remove_changes(
161                        &other_node.pointers[other_node.get_value_index(index)],
162                        ChangeType::Remove,
163                        store,
164                    )
165                    .await?,
166                );
167            }
168            (true, true) => {
169                // Main and other have a value. They may be the same or different so we check.
170                let main_index = main_node.get_value_index(index);
171                let main_pointer = mem::take(
172                    Arc::make_mut(&mut main_node)
173                        .pointers
174                        .get_mut(main_index)
175                        .unwrap(),
176                );
177
178                let other_index = other_node.get_value_index(index);
179                let other_pointer = mem::take(
180                    Arc::make_mut(&mut other_node)
181                        .pointers
182                        .get_mut(other_index)
183                        .unwrap(),
184                );
185
186                changes.extend(pointers_diff(main_pointer, other_pointer, depth, store).await?);
187            }
188            (false, false) => { /* No change */ }
189        }
190    }
191
192    Ok(changes)
193}
194
195async fn generate_add_or_remove_changes<K, V, H>(
196    node_pointer: &Pointer<K, V, H>,
197    r#type: ChangeType,
198    store: &impl BlockStore,
199) -> Result<Vec<KeyValueChange<K, V>>>
200where
201    K: Storable + Clone + Eq + Hash + AsRef<[u8]> + CondSync,
202    V: Storable + Clone + Eq + CondSync,
203    K::Serializable: Serialize + DeserializeOwned,
204    V::Serializable: Serialize + DeserializeOwned,
205    H: Hasher + CondSync,
206{
207    match node_pointer {
208        Pointer::Values(values) => Ok(values
209            .iter()
210            .map(|Pair { key, value }| KeyValueChange {
211                r#type,
212                key: key.clone(),
213                value1: Some(value.clone()),
214                value2: None,
215            })
216            .collect()),
217        Pointer::Link(link) => {
218            let node = link.resolve_value(store).await?;
219            node.as_ref()
220                .flat_map(
221                    &|Pair { key, value }| {
222                        Ok(KeyValueChange {
223                            r#type,
224                            key: key.clone(),
225                            value1: Some(value.clone()),
226                            value2: None,
227                        })
228                    },
229                    store,
230                )
231                .await
232        }
233    }
234}
235
236async fn pointers_diff<K, V, H>(
237    main_pointer: Pointer<K, V, H>,
238    other_pointer: Pointer<K, V, H>,
239    depth: usize,
240    store: &impl BlockStore,
241) -> Result<Vec<KeyValueChange<K, V>>>
242where
243    K: Storable + Clone + Eq + Hash + AsRef<[u8]> + CondSync,
244    V: Storable + Clone + Eq + CondSync,
245    K::Serializable: Serialize + DeserializeOwned,
246    V::Serializable: Serialize + DeserializeOwned,
247    H: Hasher + CondSync,
248{
249    match (main_pointer, other_pointer) {
250        (Pointer::Link(main_link), Pointer::Link(other_link)) => {
251            diff_helper(main_link, other_link, depth + 1, store).await
252        }
253        (Pointer::Values(main_values), Pointer::Values(other_values)) => {
254            let mut changes = vec![];
255            let mut main_map = HashMap::<&K, &V>::default();
256            let other_map = HashMap::<&K, &V>::from_iter(
257                other_values.iter().map(|Pair { key, value }| (key, value)),
258            );
259
260            for Pair { key, value } in &main_values {
261                match other_map.get(&key) {
262                    Some(v) => {
263                        if *v != value {
264                            changes.push(KeyValueChange {
265                                r#type: ChangeType::Modify,
266                                key: key.clone(),
267                                value1: Some(value.clone()),
268                                value2: Some((*v).clone()),
269                            });
270                        }
271                    }
272                    None => {
273                        changes.push(KeyValueChange {
274                            r#type: ChangeType::Add,
275                            key: key.clone(),
276                            value1: Some(value.clone()),
277                            value2: None,
278                        });
279                    }
280                }
281
282                main_map.insert(key, value);
283            }
284
285            for Pair { key, value } in &other_values {
286                if main_map.get(key).is_none() {
287                    changes.push(KeyValueChange {
288                        r#type: ChangeType::Remove,
289                        key: key.clone(),
290                        value1: Some(value.clone()),
291                        value2: None,
292                    });
293                }
294            }
295
296            Ok(changes)
297        }
298        (Pointer::Values(main_values), Pointer::Link(other_link)) => {
299            let main_link = Link::from(create_node_from_pairs(main_values, depth, store).await?);
300            diff_helper(main_link, other_link, depth + 1, store).await
301        }
302        (Pointer::Link(main_link), Pointer::Values(other_values)) => {
303            let other_link = Link::from(create_node_from_pairs(other_values, depth, store).await?);
304            diff_helper(main_link, other_link, depth + 1, store).await
305        }
306    }
307}
308
309async fn create_node_from_pairs<K, V, H>(
310    values: Vec<Pair<K, V>>,
311    depth: usize,
312    store: &impl BlockStore,
313) -> Result<Arc<Node<K, V, H>>>
314where
315    K: Storable + Clone + Eq + Hash + AsRef<[u8]> + CondSync,
316    V: Storable + Clone + Eq + CondSync,
317    K::Serializable: Serialize + DeserializeOwned,
318    V::Serializable: Serialize + DeserializeOwned,
319    H: Hasher + CondSync,
320{
321    let mut node = Arc::new(Node::<_, _, H>::default());
322    for Pair { key, value } in values {
323        let digest = &H::hash(&key);
324        let hashnibbles = &mut HashNibbles::with_cursor(digest, depth);
325        node.set_value(hashnibbles, key, value, store).await?;
326    }
327    Ok(node)
328}
329
330//--------------------------------------------------------------------------------------------------
331// Tests
332//--------------------------------------------------------------------------------------------------
333
334#[cfg(test)]
335mod tests {
336    use super::{ChangeType::*, *};
337    use helper::*;
338    use std::collections::BTreeSet;
339    use wnfs_common::MemoryBlockStore;
340
341    mod helper {
342        use crate::Hasher;
343        use once_cell::sync::Lazy;
344        use wnfs_common::{utils, HashOutput};
345
346        pub(super) static HASH_KV_PAIRS: Lazy<Vec<(HashOutput, &'static str)>> = Lazy::new(|| {
347            vec![
348                (utils::to_hash_output(&[0xA0]), "first"),
349                (utils::to_hash_output(&[0xA3]), "second"),
350                (utils::to_hash_output(&[0xA7]), "third"),
351                (utils::to_hash_output(&[0xAC]), "fourth"),
352                (utils::to_hash_output(&[0xAE]), "fifth"),
353            ]
354        });
355
356        #[derive(Debug, Clone)]
357        pub(crate) struct MockHasher;
358        impl Hasher for MockHasher {
359            fn hash<K: AsRef<[u8]>>(key: &K) -> HashOutput {
360                HASH_KV_PAIRS
361                    .iter()
362                    .find(|(_, v)| key.as_ref() == <dyn AsRef<[u8]>>::as_ref(v))
363                    .unwrap()
364                    .0
365            }
366        }
367    }
368
369    #[async_std::test]
370    async fn can_diff_main_node_with_added_removed_pairs() {
371        let store = &MemoryBlockStore::default();
372
373        let main_node = &mut Arc::new(Node::<[u8; 4], String>::default());
374        for i in 0u32..3 {
375            main_node
376                .set(i.to_le_bytes(), i.to_string(), store)
377                .await
378                .unwrap();
379        }
380
381        let other_node = &mut Arc::new(Node::<[u8; 4], String>::default());
382        other_node
383            .set(0_u32.to_le_bytes(), 0_u32.to_string(), store)
384            .await
385            .unwrap();
386
387        let changes = diff(
388            Link::from(Arc::clone(main_node)),
389            Link::from(Arc::clone(other_node)),
390            store,
391        )
392        .await
393        .unwrap();
394
395        assert_eq!(
396            changes.into_iter().collect::<BTreeSet<_>>(),
397            BTreeSet::from([
398                KeyValueChange {
399                    r#type: Add,
400                    key: [2, 0, 0, 0,],
401                    value1: Some(String::from("2")),
402                    value2: None,
403                },
404                KeyValueChange {
405                    r#type: Add,
406                    key: [1, 0, 0, 0,],
407                    value1: Some(String::from("1")),
408                    value2: None,
409                },
410            ])
411        );
412
413        let changes = diff(
414            Link::from(Arc::clone(other_node)),
415            Link::from(Arc::clone(main_node)),
416            store,
417        )
418        .await
419        .unwrap();
420
421        assert_eq!(
422            changes.into_iter().collect::<BTreeSet<_>>(),
423            BTreeSet::from([
424                KeyValueChange {
425                    r#type: Remove,
426                    key: [2, 0, 0, 0,],
427                    value1: Some(String::from("2")),
428                    value2: None,
429                },
430                KeyValueChange {
431                    r#type: Remove,
432                    key: [1, 0, 0, 0,],
433                    value1: Some(String::from("1")),
434                    value2: None,
435                },
436            ])
437        );
438    }
439
440    #[async_std::test]
441    async fn can_diff_main_node_with_no_changes() {
442        let store = &MemoryBlockStore::default();
443
444        let main_node = &mut Arc::new(Node::<_, _>::default());
445        for i in 0_u32..3 {
446            main_node
447                .set(i.to_le_bytes(), i.to_string(), store)
448                .await
449                .unwrap();
450        }
451
452        let other_node = &mut Arc::new(Node::<_, _>::default());
453        for i in 0_u32..3 {
454            other_node
455                .set(i.to_le_bytes(), i.to_string(), store)
456                .await
457                .unwrap();
458        }
459
460        let changes = diff(
461            Link::from(Arc::clone(main_node)),
462            Link::from(Arc::clone(other_node)),
463            store,
464        )
465        .await
466        .unwrap();
467
468        assert!(changes.is_empty());
469    }
470
471    #[async_std::test]
472    async fn can_diff_nodes_with_different_structure_and_modified_changes() {
473        let store = &MemoryBlockStore::default();
474
475        // A node that adds the first 3 pairs of HASH_KV_PAIRS.
476        let other_node = &mut Arc::new(Node::<_, _, MockHasher>::default());
477        for (digest, kv) in HASH_KV_PAIRS.iter().take(3) {
478            other_node
479                .set_value(
480                    &mut HashNibbles::new(digest),
481                    kv.to_string(),
482                    kv.to_string(),
483                    store,
484                )
485                .await
486                .unwrap();
487        }
488
489        // Another node that keeps the first pair, modify the second pair, removes the third pair, and adds the fourth and fifth pair.
490        let main_node = &mut Arc::new(Node::<_, _, MockHasher>::default());
491        main_node
492            .set_value(
493                &mut HashNibbles::new(&HASH_KV_PAIRS[0].0),
494                HASH_KV_PAIRS[0].1.to_string(),
495                HASH_KV_PAIRS[0].1.to_string(),
496                store,
497            )
498            .await
499            .unwrap();
500
501        main_node
502            .set_value(
503                &mut HashNibbles::new(&HASH_KV_PAIRS[1].0),
504                HASH_KV_PAIRS[1].1.to_string(),
505                String::from("second_modified"),
506                store,
507            )
508            .await
509            .unwrap();
510
511        for (digest, kv) in HASH_KV_PAIRS.iter().skip(3).take(2) {
512            main_node
513                .set_value(
514                    &mut HashNibbles::new(digest),
515                    kv.to_string(),
516                    kv.to_string(),
517                    store,
518                )
519                .await
520                .unwrap();
521        }
522
523        let changes = diff(
524            Link::from(Arc::clone(main_node)),
525            Link::from(Arc::clone(other_node)),
526            store,
527        )
528        .await
529        .unwrap();
530
531        assert_eq!(
532            changes,
533            vec![
534                KeyValueChange {
535                    r#type: Modify,
536                    key: "second".to_string(),
537                    value1: Some("second_modified".to_string()),
538                    value2: Some("second".to_string()),
539                },
540                KeyValueChange {
541                    r#type: Remove,
542                    key: "third".to_string(),
543                    value1: Some("third".to_string()),
544                    value2: None,
545                },
546                KeyValueChange {
547                    r#type: Add,
548                    key: "fourth".to_string(),
549                    value1: Some("fourth".to_string()),
550                    value2: None,
551                },
552                KeyValueChange {
553                    r#type: Add,
554                    key: "fifth".to_string(),
555                    value1: Some("fifth".to_string()),
556                    value2: None,
557                },
558            ]
559        );
560
561        let changes = diff(
562            Link::from(Arc::clone(other_node)),
563            Link::from(Arc::clone(main_node)),
564            store,
565        )
566        .await
567        .unwrap();
568
569        assert_eq!(
570            changes,
571            vec![
572                KeyValueChange {
573                    r#type: Modify,
574                    key: "second".to_string(),
575                    value1: Some("second".to_string()),
576                    value2: Some("second_modified".to_string()),
577                },
578                KeyValueChange {
579                    r#type: Add,
580                    key: "third".to_string(),
581                    value1: Some("third".to_string()),
582                    value2: None,
583                },
584                KeyValueChange {
585                    r#type: Remove,
586                    key: "fourth".to_string(),
587                    value1: Some("fourth".to_string()),
588                    value2: None,
589                },
590                KeyValueChange {
591                    r#type: Remove,
592                    key: "fifth".to_string(),
593                    value1: Some("fifth".to_string()),
594                    value2: None,
595                },
596            ]
597        );
598    }
599}
600
601#[cfg(test)]
602mod proptests {
603    use crate::{
604        strategies::{self, generate_kvs, generate_ops_and_changes, Change, Operations},
605        ChangeType,
606    };
607    use async_std::task;
608    use std::collections::HashSet;
609    use test_strategy::proptest;
610    use wnfs_common::{utils::Arc, Link, MemoryBlockStore};
611
612    #[proptest(cases = 100, max_shrink_iters = 4000)]
613    fn diff_correspondence(
614        #[strategy(generate_ops_and_changes())] ops_changes: (
615            Operations<String, u64>,
616            Vec<Change<String, u64>>,
617        ),
618    ) {
619        task::block_on(async {
620            let store = &MemoryBlockStore::default();
621            let (ops, strategy_changes) = ops_changes;
622
623            let other_node = &mut strategies::node_from_operations(&ops, store).await.unwrap();
624            strategies::prepare_node(other_node, &strategy_changes, store)
625                .await
626                .unwrap();
627
628            let main_node = &mut Arc::clone(other_node);
629            strategies::apply_changes(main_node, &strategy_changes, store)
630                .await
631                .unwrap();
632
633            let changes = super::diff(
634                Link::from(Arc::clone(main_node)),
635                Link::from(Arc::clone(other_node)),
636                store,
637            )
638            .await
639            .unwrap();
640
641            assert_eq!(strategy_changes.len(), changes.len());
642            for strategy_change in strategy_changes {
643                assert!(changes.iter().any(|c| match &strategy_change {
644                    Change::Add(k, _) => c.r#type == ChangeType::Add && &c.key == k,
645                    Change::Modify(k, _) => {
646                        c.r#type == ChangeType::Modify && &c.key == k
647                    }
648                    Change::Remove(k) => {
649                        c.r#type == ChangeType::Remove && &c.key == k
650                    }
651                }));
652            }
653        });
654    }
655
656    #[proptest(cases = 1000, max_shrink_iters = 40000)]
657    fn diff_unique_keys(
658        #[strategy(generate_kvs("[a-z0-9]{1,3}", 0u64..1000, 0..100))] kvs1: Vec<(String, u64)>,
659        #[strategy(generate_kvs("[a-z0-9]{1,3}", 0u64..1000, 0..100))] kvs2: Vec<(String, u64)>,
660    ) {
661        task::block_on(async {
662            let store = &MemoryBlockStore::default();
663
664            let node1 = strategies::node_from_kvs(kvs1, store).await.unwrap();
665            let node2 = strategies::node_from_kvs(kvs2, store).await.unwrap();
666
667            let changes = super::diff(Link::from(node1), Link::from(node2), store)
668                .await
669                .unwrap();
670
671            let change_set = changes
672                .iter()
673                .map(|c| c.key.clone())
674                .collect::<HashSet<_>>();
675
676            assert_eq!(change_set.len(), changes.len());
677        });
678    }
679
680    #[proptest(cases = 100)]
681    fn add_remove_flip(
682        #[strategy(generate_kvs("[a-z0-9]{1,3}", 0u64..1000, 0..100))] kvs1: Vec<(String, u64)>,
683        #[strategy(generate_kvs("[a-z0-9]{1,3}", 0u64..1000, 0..100))] kvs2: Vec<(String, u64)>,
684    ) {
685        task::block_on(async {
686            let store = &MemoryBlockStore::default();
687
688            let node1 = strategies::node_from_kvs(kvs1, store).await.unwrap();
689            let node2 = strategies::node_from_kvs(kvs2, store).await.unwrap();
690
691            let changes = super::diff(
692                Link::from(Arc::clone(&node1)),
693                Link::from(Arc::clone(&node2)),
694                store,
695            )
696            .await
697            .unwrap();
698
699            let flipped_changes = super::diff(Link::from(node2), Link::from(node1), store)
700                .await
701                .unwrap();
702
703            assert_eq!(changes.len(), flipped_changes.len());
704            for change in changes {
705                assert!(flipped_changes.iter().any(|c| match change.r#type {
706                    ChangeType::Add => c.r#type == ChangeType::Remove && c.key == change.key,
707                    ChangeType::Remove => c.r#type == ChangeType::Add && c.key == change.key,
708                    ChangeType::Modify => c.r#type == ChangeType::Modify && c.key == change.key,
709                }));
710            }
711        });
712    }
713}