pollen-crdt 0.1.0

CRDT synchronization for Pollen
Documentation
//! Merkle tree for anti-entropy synchronization.

use bytes::Bytes;
use std::collections::BTreeMap;

/// Simple Merkle tree for detecting data differences.
pub struct MerkleTree {
    /// Leaf nodes (key -> hash).
    leaves: BTreeMap<String, [u8; 32]>,
    /// Number of levels in the tree.
    levels: usize,
}

impl MerkleTree {
    /// Create a new empty Merkle tree.
    pub fn new() -> Self {
        Self {
            leaves: BTreeMap::new(),
            levels: 16, // 16 levels = 65536 buckets
        }
    }

    /// Insert or update a key-value pair.
    pub fn insert(&mut self, key: &str, value: &[u8]) {
        let hash = Self::hash(value);
        self.leaves.insert(key.to_string(), hash);
    }

    /// Remove a key.
    pub fn remove(&mut self, key: &str) {
        self.leaves.remove(key);
    }

    /// Get the root hash.
    pub fn root_hash(&self) -> Bytes {
        if self.leaves.is_empty() {
            return Bytes::from_static(&[0u8; 32]);
        }

        let hashes: Vec<_> = self.leaves.values().cloned().collect();
        let combined = Self::combine_hashes(&hashes);
        Bytes::copy_from_slice(&combined)
    }

    /// Get hashes for a specific level (for incremental sync).
    pub fn level_hashes(&self, level: usize) -> Vec<(String, Bytes)> {
        if level >= self.levels {
            return vec![];
        }

        let bucket_count = 1 << level;
        let mut buckets: Vec<Vec<[u8; 32]>> = vec![vec![]; bucket_count];

        for (key, hash) in &self.leaves {
            let bucket = Self::key_to_bucket(key, bucket_count);
            buckets[bucket].push(*hash);
        }

        buckets
            .into_iter()
            .enumerate()
            .map(|(i, hashes)| {
                let combined = if hashes.is_empty() {
                    [0u8; 32]
                } else {
                    Self::combine_hashes(&hashes)
                };
                (format!("{:x}", i), Bytes::copy_from_slice(&combined))
            })
            .collect()
    }

    /// Find keys in a specific range.
    pub fn keys_in_range(&self, start: &str, end: &str) -> Vec<String> {
        self.leaves
            .range(start.to_string()..end.to_string())
            .map(|(k, _)| k.clone())
            .collect()
    }

    /// Get all keys.
    pub fn keys(&self) -> Vec<String> {
        self.leaves.keys().cloned().collect()
    }

    /// Hash a value using a simple hash function.
    fn hash(data: &[u8]) -> [u8; 32] {
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};

        let mut hasher = DefaultHasher::new();
        data.hash(&mut hasher);
        let h1 = hasher.finish();

        let mut hasher2 = DefaultHasher::new();
        h1.hash(&mut hasher2);
        let h2 = hasher2.finish();

        let mut hasher3 = DefaultHasher::new();
        h2.hash(&mut hasher3);
        let h3 = hasher3.finish();

        let mut hasher4 = DefaultHasher::new();
        h3.hash(&mut hasher4);
        let h4 = hasher4.finish();

        let mut result = [0u8; 32];
        result[0..8].copy_from_slice(&h1.to_le_bytes());
        result[8..16].copy_from_slice(&h2.to_le_bytes());
        result[16..24].copy_from_slice(&h3.to_le_bytes());
        result[24..32].copy_from_slice(&h4.to_le_bytes());
        result
    }

    /// Combine multiple hashes into one.
    fn combine_hashes(hashes: &[[u8; 32]]) -> [u8; 32] {
        if hashes.is_empty() {
            return [0u8; 32];
        }

        let mut combined = hashes[0];
        for hash in &hashes[1..] {
            for i in 0..32 {
                combined[i] ^= hash[i];
            }
        }
        combined
    }

    /// Map a key to a bucket index.
    fn key_to_bucket(key: &str, bucket_count: usize) -> usize {
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};

        let mut hasher = DefaultHasher::new();
        key.hash(&mut hasher);
        (hasher.finish() as usize) % bucket_count
    }
}

impl Default for MerkleTree {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_empty_tree() {
        let tree = MerkleTree::new();
        let root = tree.root_hash();
        assert_eq!(root.len(), 32);
    }

    #[test]
    fn test_insert_and_hash() {
        let mut tree = MerkleTree::new();
        tree.insert("key1", b"value1");

        let root1 = tree.root_hash();

        tree.insert("key2", b"value2");
        let root2 = tree.root_hash();

        assert_ne!(root1, root2);
    }

    #[test]
    fn test_same_content_same_hash() {
        let mut tree1 = MerkleTree::new();
        tree1.insert("key1", b"value1");
        tree1.insert("key2", b"value2");

        let mut tree2 = MerkleTree::new();
        tree2.insert("key2", b"value2");
        tree2.insert("key1", b"value1");

        assert_eq!(tree1.root_hash(), tree2.root_hash());
    }

    #[test]
    fn test_remove() {
        let mut tree = MerkleTree::new();
        tree.insert("key1", b"value1");
        let root1 = tree.root_hash();

        tree.insert("key2", b"value2");
        tree.remove("key2");
        let root2 = tree.root_hash();

        assert_eq!(root1, root2);
    }
}