sync_engine/merkle/
path_tree.rs

1// Copyright (c) 2025-2026 Adrian Robinson. Licensed under the AGPL-3.0.
2// See LICENSE file in the project root for full license text.
3
4//! Path-based Merkle tree for hierarchical sync verification.
5//!
6//! Uses the natural tree structure from reverse DNS object IDs:
7//! `uk.nhs.patient.record.123` becomes a path in the tree.
8//!
9//! Stored in Redis as a "shadow" alongside data - data can be evicted,
10//! but Merkle nodes persist (they're tiny: 32 bytes per node).
11
12use sha2::{Digest, Sha256};
13use std::collections::BTreeMap;
14use tracing::instrument;
15
16/// A node in the path-based Merkle tree.
17#[derive(Debug, Clone)]
18pub struct MerkleNode {
19    /// Hash of this node (computed from children or leaf value)
20    pub hash: [u8; 32],
21    /// Child segment -> child hash (empty for leaves)
22    pub children: BTreeMap<String, [u8; 32]>,
23    /// True if this is a leaf (actual item), false if interior node
24    pub is_leaf: bool,
25}
26
27impl MerkleNode {
28    /// Create a leaf node from a version hash.
29    pub fn leaf(version_hash: [u8; 32]) -> Self {
30        Self {
31            hash: version_hash,
32            children: BTreeMap::new(),
33            is_leaf: true,
34        }
35    }
36
37    /// Create an interior node, computing hash from children.
38    pub fn interior(children: BTreeMap<String, [u8; 32]>) -> Self {
39        let hash = Self::compute_hash(&children);
40        Self {
41            hash,
42            children,
43            is_leaf: false,
44        }
45    }
46
47    /// Compute hash from sorted children (deterministic).
48    fn compute_hash(children: &BTreeMap<String, [u8; 32]>) -> [u8; 32] {
49        let mut hasher = Sha256::new();
50        // BTreeMap is already sorted by key
51        for (segment, child_hash) in children {
52            hasher.update(segment.as_bytes());
53            hasher.update(b":");
54            hasher.update(child_hash);
55            hasher.update(b";");
56        }
57        hasher.finalize().into()
58    }
59
60    /// Recompute hash after children changed.
61    pub fn recompute_hash(&mut self) {
62        if !self.is_leaf {
63            self.hash = Self::compute_hash(&self.children);
64        }
65    }
66
67    /// Add or update a child, returns true if hash changed.
68    pub fn set_child(&mut self, segment: String, child_hash: [u8; 32]) -> bool {
69        let old_hash = self.hash;
70        self.children.insert(segment, child_hash);
71        self.recompute_hash();
72        self.hash != old_hash
73    }
74
75    /// Remove a child, returns true if hash changed.
76    pub fn remove_child(&mut self, segment: &str) -> bool {
77        let old_hash = self.hash;
78        self.children.remove(segment);
79        self.recompute_hash();
80        self.hash != old_hash
81    }
82}
83
84/// Manages path-based Merkle tree operations.
85///
86/// This is the local computation layer - storage is handled by RedisMerkleStore.
87#[derive(Debug)]
88pub struct PathMerkle;
89
90impl PathMerkle {
91    /// Split an object_id into path segments.
92    ///
93    /// `uk.nhs.patient.record.123` -> `["uk", "nhs", "patient", "record", "123"]`
94    #[inline]
95    pub fn split_path(object_id: &str) -> Vec<&str> {
96        object_id.split('.').collect()
97    }
98
99    /// Get the parent prefix for a path.
100    ///
101    /// `uk.nhs.patient.record.123` -> `uk.nhs.patient.record`
102    /// `uk` -> `""` (root)
103    pub fn parent_prefix(path: &str) -> &str {
104        match path.rfind('.') {
105            Some(idx) => &path[..idx],
106            None => "",
107        }
108    }
109
110    /// Get all ancestor prefixes for a path (excluding root, including self).
111    ///
112    /// `uk.nhs.patient` -> `["uk", "uk.nhs", "uk.nhs.patient"]`
113    pub fn ancestor_prefixes(path: &str) -> Vec<String> {
114        let segments: Vec<&str> = path.split('.').collect();
115        let mut prefixes = Vec::with_capacity(segments.len());
116        let mut current = String::new();
117        
118        for (i, segment) in segments.iter().enumerate() {
119            if i > 0 {
120                current.push('.');
121            }
122            current.push_str(segment);
123            prefixes.push(current.clone());
124        }
125        
126        prefixes
127    }
128
129    /// Compute the leaf hash for a sync item.
130    ///
131    /// Hash = SHA256(object_id || version || timestamp || payload_hash)
132    #[instrument(skip(payload_hash))]
133    pub fn leaf_hash(
134        object_id: &str,
135        version: u64,
136        timestamp: i64,
137        payload_hash: &[u8; 32],
138    ) -> [u8; 32] {
139        let mut hasher = Sha256::new();
140        hasher.update(object_id.as_bytes());
141        hasher.update(b"|");
142        hasher.update(version.to_le_bytes());
143        hasher.update(b"|");
144        hasher.update(timestamp.to_le_bytes());
145        hasher.update(b"|");
146        hasher.update(payload_hash);
147        hasher.finalize().into()
148    }
149
150    /// Compute hash of a payload (for leaf_hash input).
151    pub fn payload_hash(payload: &[u8]) -> [u8; 32] {
152        Sha256::digest(payload).into()
153    }
154}
155
156/// Batch of Merkle updates to apply atomically.
157#[derive(Debug, Default)]
158pub struct MerkleBatch {
159    /// Leaf updates: object_id -> new hash (or None to delete)
160    pub leaves: BTreeMap<String, Option<[u8; 32]>>,
161}
162
163impl MerkleBatch {
164    pub fn new() -> Self {
165        Self::default()
166    }
167
168    /// Add a leaf update.
169    pub fn insert(&mut self, object_id: String, hash: [u8; 32]) {
170        self.leaves.insert(object_id, Some(hash));
171    }
172
173    /// Mark a leaf for deletion.
174    pub fn delete(&mut self, object_id: String) {
175        self.leaves.insert(object_id, None);
176    }
177
178    pub fn is_empty(&self) -> bool {
179        self.leaves.is_empty()
180    }
181
182    pub fn len(&self) -> usize {
183        self.leaves.len()
184    }
185
186    /// Get all affected prefixes (for bubble-up updates).
187    ///
188    /// Returns prefixes in bottom-up order (leaves first, root last).
189    pub fn affected_prefixes(&self) -> Vec<String> {
190        use std::collections::BTreeSet;
191        
192        let mut all_prefixes = BTreeSet::new();
193        
194        for object_id in self.leaves.keys() {
195            for prefix in PathMerkle::ancestor_prefixes(object_id) {
196                all_prefixes.insert(prefix);
197            }
198        }
199        
200        // Convert to vec and reverse (we want bottom-up for processing)
201        let mut prefixes: Vec<_> = all_prefixes.into_iter().collect();
202        // Sort by depth (more dots = deeper = process first)
203        prefixes.sort_by(|a, b| {
204            let depth_a = a.matches('.').count();
205            let depth_b = b.matches('.').count();
206            depth_b.cmp(&depth_a) // Reverse: deeper first
207        });
208        
209        prefixes
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    #[test]
218    fn test_split_path() {
219        assert_eq!(
220            PathMerkle::split_path("uk.nhs.patient.record.123"),
221            vec!["uk", "nhs", "patient", "record", "123"]
222        );
223    }
224
225    #[test]
226    fn test_parent_prefix() {
227        assert_eq!(PathMerkle::parent_prefix("uk.nhs.patient"), "uk.nhs");
228        assert_eq!(PathMerkle::parent_prefix("uk"), "");
229    }
230
231    #[test]
232    fn test_ancestor_prefixes() {
233        assert_eq!(
234            PathMerkle::ancestor_prefixes("uk.nhs.patient"),
235            vec!["uk", "uk.nhs", "uk.nhs.patient"]
236        );
237    }
238
239    #[test]
240    fn test_merkle_node_hash_deterministic() {
241        let mut children = BTreeMap::new();
242        children.insert("a".to_string(), [1u8; 32]);
243        children.insert("b".to_string(), [2u8; 32]);
244        
245        let node1 = MerkleNode::interior(children.clone());
246        let node2 = MerkleNode::interior(children);
247        
248        assert_eq!(node1.hash, node2.hash);
249    }
250
251    #[test]
252    fn test_merkle_batch_affected_prefixes() {
253        let mut batch = MerkleBatch::new();
254        batch.insert("uk.nhs.patient.123".to_string(), [1u8; 32]);
255        batch.insert("uk.nhs.doctor.456".to_string(), [2u8; 32]);
256        
257        let prefixes = batch.affected_prefixes();
258        
259        // Should be sorted deepest first
260        assert!(prefixes.iter().position(|p| p == "uk.nhs.patient")
261            < prefixes.iter().position(|p| p == "uk.nhs"));
262        assert!(prefixes.iter().position(|p| p == "uk.nhs")
263            < prefixes.iter().position(|p| p == "uk"));
264    }
265}