Skip to main content

leann_core/
sync.rs

1use sha2::{Digest, Sha256};
2use std::collections::HashMap;
3use std::path::{Path, PathBuf};
4
5/// Hash data using SHA-256.
6pub fn hash_data(data: &[u8]) -> String {
7    let mut hasher = Sha256::new();
8    hasher.update(data);
9    format!("{:x}", hasher.finalize())
10}
11
12/// A node in the Merkle tree.
13#[derive(Debug, Clone)]
14pub struct MerkleTreeNode {
15    pub hash: String,
16    pub data: String,
17    pub children: HashMap<String, MerkleTreeNode>,
18    pub parent_id: Option<String>,
19}
20
21/// A flat Merkle tree for detecting file changes.
22pub struct MerkleTree {
23    pub nodes: HashMap<String, MerkleTreeNode>,
24    pub root: Option<String>, // hash of root node
25}
26
27impl MerkleTree {
28    pub fn new() -> Self {
29        Self {
30            nodes: HashMap::new(),
31            root: None,
32        }
33    }
34
35    pub fn add_node(&mut self, data: &str, parent_id: Option<&str>, hash: Option<&str>) -> String {
36        let hash = hash
37            .map(String::from)
38            .unwrap_or_else(|| hash_data(data.as_bytes()));
39
40        let node = MerkleTreeNode {
41            hash: hash.clone(),
42            data: data.to_string(),
43            children: HashMap::new(),
44            parent_id: parent_id.map(String::from),
45        };
46
47        self.nodes.insert(hash.clone(), node);
48
49        if parent_id.is_none() {
50            self.root = Some(hash.clone());
51        } else if let Some(pid) = parent_id {
52            // Clone the child node first to avoid borrow conflict
53            let child_node = self.nodes.get(&hash).unwrap().clone();
54            if let Some(parent) = self.nodes.get_mut(pid) {
55                parent.children.insert(hash.clone(), child_node);
56            }
57        }
58
59        hash
60    }
61
62    /// Compare two trees and return (added, removed, modified) file paths.
63    pub fn compare_with(&self, other: &MerkleTree) -> (Vec<String>, Vec<String>, Vec<String>) {
64        let self_root = match &self.root {
65            Some(r) => r,
66            None => return (Vec::new(), Vec::new(), Vec::new()),
67        };
68        let other_root = match &other.root {
69            Some(r) => r,
70            None => return (Vec::new(), Vec::new(), Vec::new()),
71        };
72
73        let self_root_node = &self.nodes[self_root];
74        let other_root_node = &other.nodes[other_root];
75
76        if self_root_node.hash == other_root_node.hash {
77            return (Vec::new(), Vec::new(), Vec::new());
78        }
79
80        let old_files = &self_root_node.children;
81        let new_files = &other_root_node.children;
82
83        let mut all_paths: std::collections::HashSet<&str> = std::collections::HashSet::new();
84        for node in old_files.values() {
85            all_paths.insert(&node.data);
86        }
87        for node in new_files.values() {
88            all_paths.insert(&node.data);
89        }
90
91        let old_by_data: HashMap<&str, &str> = old_files
92            .iter()
93            .map(|(hash, node)| (node.data.as_str(), hash.as_str()))
94            .collect();
95        let new_by_data: HashMap<&str, &str> = new_files
96            .iter()
97            .map(|(hash, node)| (node.data.as_str(), hash.as_str()))
98            .collect();
99
100        let mut added = Vec::new();
101        let mut removed = Vec::new();
102        let mut modified = Vec::new();
103
104        for path in all_paths {
105            match (old_by_data.get(path), new_by_data.get(path)) {
106                (Some(_), Some(_)) => {
107                    // Both exist - check if data changed by comparing child hashes
108                    // In the flat tree, the hash IS the content hash, so if both
109                    // exist with different hashes, it's modified.
110                    // Note: In this simple tree, hash == path for children
111                    modified.push(path.to_string());
112                }
113                (None, Some(_)) => added.push(path.to_string()),
114                (Some(_), None) => removed.push(path.to_string()),
115                (None, None) => unreachable!(),
116            }
117        }
118
119        (added, removed, modified)
120    }
121}
122
123impl Default for MerkleTree {
124    fn default() -> Self {
125        Self::new()
126    }
127}
128
129/// File synchronizer that detects changes using Merkle trees.
130pub struct FileSynchronizer {
131    root_dir: PathBuf,
132    tree: MerkleTree,
133    ignore_patterns: Vec<String>,
134    include_extensions: Vec<String>,
135}
136
137impl FileSynchronizer {
138    pub fn new(
139        root_dir: &Path,
140        ignore_patterns: Vec<String>,
141        include_extensions: Vec<String>,
142    ) -> anyhow::Result<Self> {
143        if !root_dir.is_dir() {
144            anyhow::bail!("Not a valid directory: {}", root_dir.display());
145        }
146
147        let mut sync = Self {
148            root_dir: root_dir.to_path_buf(),
149            tree: MerkleTree::new(),
150            ignore_patterns,
151            include_extensions,
152        };
153
154        sync.build_initial_tree()?;
155        Ok(sync)
156    }
157
158    /// Check for file changes and return (added, removed, modified).
159    pub fn check_for_changes(&mut self) -> anyhow::Result<(Vec<String>, Vec<String>, Vec<String>)> {
160        let file_hashes = self.generate_file_hashes()?;
161        let new_tree = self.build_merkle_tree(&file_hashes);
162        let changes = self.tree.compare_with(&new_tree);
163
164        if changes.0.is_empty() && changes.1.is_empty() && changes.2.is_empty() {
165            // No changes
166        } else {
167            self.tree = new_tree;
168        }
169
170        Ok(changes)
171    }
172
173    fn build_initial_tree(&mut self) -> anyhow::Result<()> {
174        let file_hashes = self.generate_file_hashes()?;
175        self.tree = self.build_merkle_tree(&file_hashes);
176        Ok(())
177    }
178
179    fn generate_file_hashes(&self) -> anyhow::Result<HashMap<String, String>> {
180        let mut hashes = HashMap::new();
181
182        for entry in walkdir_recursive(&self.root_dir) {
183            let path = entry.as_path();
184
185            if !path.is_file() {
186                continue;
187            }
188
189            // Check extensions
190            if !self.include_extensions.is_empty() {
191                let ext = path
192                    .extension()
193                    .map(|e| format!(".{}", e.to_string_lossy()))
194                    .unwrap_or_default();
195                if !self.include_extensions.iter().any(|e| e == &ext) {
196                    continue;
197                }
198            }
199
200            // Check ignore patterns
201            let path_str = path.to_string_lossy();
202            if self.ignore_patterns.iter().any(|p| path_str.contains(p)) {
203                continue;
204            }
205
206            match std::fs::read(path) {
207                Ok(data) => {
208                    hashes.insert(path_str.to_string(), hash_data(&data));
209                }
210                Err(_) => continue,
211            }
212        }
213
214        Ok(hashes)
215    }
216
217    fn build_merkle_tree(&self, file_hashes: &HashMap<String, String>) -> MerkleTree {
218        let mut tree = MerkleTree::new();
219
220        let mut sorted_paths: Vec<&String> = file_hashes.keys().collect();
221        sorted_paths.sort();
222
223        let root_data: String = sorted_paths
224            .iter()
225            .map(|p| format!("{}{}", p, file_hashes[*p]))
226            .collect();
227
228        let root_id = tree.add_node(&root_data, None, None);
229
230        for path in sorted_paths {
231            tree.add_node(path, Some(&root_id), Some(path));
232        }
233
234        tree
235    }
236}
237
238fn walkdir_recursive(dir: &Path) -> Vec<PathBuf> {
239    let mut result = Vec::new();
240    if let Ok(entries) = std::fs::read_dir(dir) {
241        for entry in entries.flatten() {
242            let path = entry.path();
243            if path.is_dir() {
244                result.extend(walkdir_recursive(&path));
245            } else {
246                result.push(path);
247            }
248        }
249    }
250    result
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn test_hash_data() {
259        let h = hash_data(b"hello world");
260        assert!(!h.is_empty());
261        assert_eq!(h.len(), 64); // SHA-256 hex is 64 chars
262    }
263
264    #[test]
265    fn test_merkle_tree_no_changes() {
266        let mut tree1 = MerkleTree::new();
267        let root = tree1.add_node("root_data", None, None);
268        tree1.add_node("file1", Some(&root), Some("file1.txt"));
269
270        let mut tree2 = MerkleTree::new();
271        let root2 = tree2.add_node("root_data", None, None);
272        tree2.add_node("file1", Some(&root2), Some("file1.txt"));
273
274        let (added, removed, modified) = tree1.compare_with(&tree2);
275        assert!(added.is_empty());
276        assert!(removed.is_empty());
277        assert!(modified.is_empty());
278    }
279}