sync_engine/merkle/
path_tree.rs

1// Copyright (c) 2025-2026 Adrian Robinson. Licensed under the AGPL-3.0.
2// See LICENSE file in the project root for full license text.
3
4//! Path-based Merkle tree for hierarchical sync verification.
5//!
6//! Uses the natural tree structure from reverse DNS object IDs:
7//! `uk.nhs.patient.record.123` becomes a path in the tree.
8//!
9//! Stored in Redis as a "shadow" alongside data - data can be evicted,
10//! but Merkle nodes persist (they're tiny: 32 bytes per node).
11
12use sha2::{Digest, Sha256};
13use std::collections::BTreeMap;
14use tracing::instrument;
15
16/// A node in the path-based Merkle tree.
17#[derive(Debug, Clone)]
18pub struct MerkleNode {
19    /// Hash of this node (computed from children or leaf value)
20    pub hash: [u8; 32],
21    /// Child segment -> child hash (empty for leaves)
22    pub children: BTreeMap<String, [u8; 32]>,
23    /// True if this is a leaf (actual item), false if interior node
24    pub is_leaf: bool,
25}
26
27impl MerkleNode {
28    /// Create a leaf node from a version hash.
29    pub fn leaf(version_hash: [u8; 32]) -> Self {
30        Self {
31            hash: version_hash,
32            children: BTreeMap::new(),
33            is_leaf: true,
34        }
35    }
36
37    /// Create an interior node, computing hash from children.
38    pub fn interior(children: BTreeMap<String, [u8; 32]>) -> Self {
39        let hash = Self::compute_hash(&children);
40        Self {
41            hash,
42            children,
43            is_leaf: false,
44        }
45    }
46
47    /// Compute hash from sorted children (deterministic).
48    fn compute_hash(children: &BTreeMap<String, [u8; 32]>) -> [u8; 32] {
49        let mut hasher = Sha256::new();
50        // BTreeMap is already sorted by key
51        for (segment, child_hash) in children {
52            hasher.update(segment.as_bytes());
53            hasher.update(b":");
54            hasher.update(child_hash);
55            hasher.update(b";");
56        }
57        hasher.finalize().into()
58    }
59
60    /// Recompute hash after children changed.
61    pub fn recompute_hash(&mut self) {
62        if !self.is_leaf {
63            self.hash = Self::compute_hash(&self.children);
64        }
65    }
66
67    /// Add or update a child, returns true if hash changed.
68    pub fn set_child(&mut self, segment: String, child_hash: [u8; 32]) -> bool {
69        let old_hash = self.hash;
70        self.children.insert(segment, child_hash);
71        self.recompute_hash();
72        self.hash != old_hash
73    }
74
75    /// Remove a child, returns true if hash changed.
76    pub fn remove_child(&mut self, segment: &str) -> bool {
77        let old_hash = self.hash;
78        self.children.remove(segment);
79        self.recompute_hash();
80        self.hash != old_hash
81    }
82}
83
84/// Manages path-based Merkle tree operations.
85///
86/// This is the local computation layer - storage is handled by RedisMerkleStore.
87#[derive(Debug)]
88pub struct PathMerkle;
89
90impl PathMerkle {
91    /// Split an object_id into path segments.
92    ///
93    /// `uk.nhs.patient.record.123` -> `["uk", "nhs", "patient", "record", "123"]`
94    #[inline]
95    pub fn split_path(object_id: &str) -> Vec<&str> {
96        object_id.split('.').collect()
97    }
98
99    /// Get the parent prefix for a path.
100    ///
101    /// `uk.nhs.patient.record.123` -> `uk.nhs.patient.record`
102    /// `uk` -> `""` (root)
103    pub fn parent_prefix(path: &str) -> &str {
104        match path.rfind('.') {
105            Some(idx) => &path[..idx],
106            None => "",
107        }
108    }
109
110    /// Get all ancestor prefixes for a path (excluding root, including self).
111    ///
112    /// `uk.nhs.patient` -> `["uk", "uk.nhs", "uk.nhs.patient"]`
113    pub fn ancestor_prefixes(path: &str) -> Vec<String> {
114        let segments: Vec<&str> = path.split('.').collect();
115        let mut prefixes = Vec::with_capacity(segments.len());
116        let mut current = String::new();
117        
118        for (i, segment) in segments.iter().enumerate() {
119            if i > 0 {
120                current.push('.');
121            }
122            current.push_str(segment);
123            prefixes.push(current.clone());
124        }
125        
126        prefixes
127    }
128
129    /// Compute the leaf hash for a sync item.
130    ///
131    /// Hash = SHA256(object_id || version || payload_hash)
132    /// 
133    /// Note: We intentionally exclude timestamp to ensure idempotent updates.
134    /// Two writes with identical content should produce the same Merkle root,
135    /// regardless of when they occurred. This prevents false sync conflicts.
136    #[instrument(skip(payload_hash))]
137    pub fn leaf_hash(
138        object_id: &str,
139        version: u64,
140        payload_hash: &[u8; 32],
141    ) -> [u8; 32] {
142        let mut hasher = Sha256::new();
143        hasher.update(object_id.as_bytes());
144        hasher.update(b"|");
145        hasher.update(version.to_le_bytes());
146        hasher.update(b"|");
147        hasher.update(payload_hash);
148        hasher.finalize().into()
149    }
150
151    /// Compute hash of a payload (for leaf_hash input).
152    pub fn payload_hash(payload: &[u8]) -> [u8; 32] {
153        Sha256::digest(payload).into()
154    }
155}
156
157/// Batch of Merkle updates to apply atomically.
158#[derive(Debug, Default)]
159pub struct MerkleBatch {
160    /// Leaf updates: object_id -> new hash (or None to delete)
161    pub leaves: BTreeMap<String, Option<[u8; 32]>>,
162}
163
164impl MerkleBatch {
165    pub fn new() -> Self {
166        Self::default()
167    }
168
169    /// Add a leaf update.
170    pub fn insert(&mut self, object_id: String, hash: [u8; 32]) {
171        self.leaves.insert(object_id, Some(hash));
172    }
173
174    /// Mark a leaf for deletion.
175    pub fn delete(&mut self, object_id: String) {
176        self.leaves.insert(object_id, None);
177    }
178
179    pub fn is_empty(&self) -> bool {
180        self.leaves.is_empty()
181    }
182
183    pub fn len(&self) -> usize {
184        self.leaves.len()
185    }
186
187    /// Get all affected prefixes (for bubble-up updates).
188    ///
189    /// Returns prefixes in bottom-up order (leaves first, root last).
190    pub fn affected_prefixes(&self) -> Vec<String> {
191        use std::collections::BTreeSet;
192        
193        let mut all_prefixes = BTreeSet::new();
194        
195        for object_id in self.leaves.keys() {
196            for prefix in PathMerkle::ancestor_prefixes(object_id) {
197                all_prefixes.insert(prefix);
198            }
199        }
200        
201        // Convert to vec and reverse (we want bottom-up for processing)
202        let mut prefixes: Vec<_> = all_prefixes.into_iter().collect();
203        // Sort by depth (more dots = deeper = process first)
204        prefixes.sort_by(|a, b| {
205            let depth_a = a.matches('.').count();
206            let depth_b = b.matches('.').count();
207            depth_b.cmp(&depth_a) // Reverse: deeper first
208        });
209        
210        prefixes
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_split_path() {
220        assert_eq!(
221            PathMerkle::split_path("uk.nhs.patient.record.123"),
222            vec!["uk", "nhs", "patient", "record", "123"]
223        );
224    }
225
226    #[test]
227    fn test_parent_prefix() {
228        assert_eq!(PathMerkle::parent_prefix("uk.nhs.patient"), "uk.nhs");
229        assert_eq!(PathMerkle::parent_prefix("uk"), "");
230    }
231
232    #[test]
233    fn test_ancestor_prefixes() {
234        assert_eq!(
235            PathMerkle::ancestor_prefixes("uk.nhs.patient"),
236            vec!["uk", "uk.nhs", "uk.nhs.patient"]
237        );
238    }
239
240    #[test]
241    fn test_merkle_node_hash_deterministic() {
242        let mut children = BTreeMap::new();
243        children.insert("a".to_string(), [1u8; 32]);
244        children.insert("b".to_string(), [2u8; 32]);
245        
246        let node1 = MerkleNode::interior(children.clone());
247        let node2 = MerkleNode::interior(children);
248        
249        assert_eq!(node1.hash, node2.hash);
250    }
251
252    #[test]
253    fn test_merkle_batch_affected_prefixes() {
254        let mut batch = MerkleBatch::new();
255        batch.insert("uk.nhs.patient.123".to_string(), [1u8; 32]);
256        batch.insert("uk.nhs.doctor.456".to_string(), [2u8; 32]);
257        
258        let prefixes = batch.affected_prefixes();
259        
260        // Should be sorted deepest first
261        assert!(prefixes.iter().position(|p| p == "uk.nhs.patient")
262            < prefixes.iter().position(|p| p == "uk.nhs"));
263        assert!(prefixes.iter().position(|p| p == "uk.nhs")
264            < prefixes.iter().position(|p| p == "uk"));
265    }
266}