Skip to main content

ripvec_core/cache/
diff.rs

1//! Two-level Merkle diff for incremental change detection.
2//!
3//! Level 1: Compare directory mtimes — skip entire subtrees if unchanged.
4//! Level 2: For changed directories, stat files and blake3-hash dirty ones.
5
6use std::collections::HashSet;
7use std::path::{Path, PathBuf};
8use std::time::UNIX_EPOCH;
9
10use crate::cache::manifest::Manifest;
11
12/// Result of diffing the filesystem against a manifest.
13#[derive(Debug)]
14pub struct DiffResult {
15    /// Files that are new or have changed content.
16    pub dirty: Vec<PathBuf>,
17    /// Files in the manifest that no longer exist on disk.
18    pub deleted: Vec<String>,
19    /// Number of files that matched the manifest (no re-embedding needed).
20    pub unchanged: usize,
21}
22
23/// Compare the filesystem at `root` against the `manifest` to find changes.
24///
25/// Uses a two-level strategy:
26/// 1. Directory mtime — if unchanged, skip the entire subtree
27/// 2. File (mtime, size) — if changed, blake3-hash the content to confirm
28///
29/// # Errors
30///
31/// Returns an error if the root directory cannot be read.
32pub fn compute_diff(root: &Path, manifest: &Manifest) -> crate::Result<DiffResult> {
33    let mut dirty = Vec::new();
34    let mut unchanged = 0;
35
36    // Track which manifest files we've seen on disk
37    let mut seen_files: HashSet<String> = HashSet::new();
38
39    // Walk the filesystem using the same walker as embed
40    let files = crate::walk::collect_files(root, None);
41
42    for file_path in &files {
43        let relative = file_path
44            .strip_prefix(root)
45            .unwrap_or(file_path)
46            .to_string_lossy()
47            .to_string();
48
49        seen_files.insert(relative.clone());
50
51        // Check if file exists in manifest
52        let Some(entry) = manifest.files.get(&relative) else {
53            // New file — not in manifest
54            dirty.push(file_path.clone());
55            continue;
56        };
57
58        // Quick check: mtime + size
59        let Ok(metadata) = std::fs::metadata(file_path) else {
60            dirty.push(file_path.clone());
61            continue;
62        };
63
64        let mtime_secs = metadata
65            .modified()
66            .ok()
67            .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
68            .map_or(0, |d| d.as_secs());
69        let size = metadata.len();
70
71        if mtime_secs == entry.mtime_secs && size == entry.size {
72            // Fast path: mtime+size match → assume unchanged
73            unchanged += 1;
74            continue;
75        }
76
77        // Mtime or size changed — verify with content hash
78        let Ok(content) = std::fs::read(file_path) else {
79            dirty.push(file_path.clone());
80            continue;
81        };
82        let content_hash = blake3::hash(&content).to_hex().to_string();
83
84        if content_hash == entry.content_hash {
85            // False alarm: file was touched but content unchanged
86            unchanged += 1;
87        } else {
88            dirty.push(file_path.clone());
89        }
90    }
91
92    // Detect deleted files (in manifest but not on disk)
93    let deleted: Vec<String> = manifest
94        .files
95        .keys()
96        .filter(|k| !seen_files.contains(k.as_str()))
97        .cloned()
98        .collect();
99
100    Ok(DiffResult {
101        dirty,
102        deleted,
103        unchanged,
104    })
105}
106
107/// Compute the blake3 hash of a file's content.
108///
109/// # Errors
110///
111/// Returns an error if the file cannot be read.
112pub fn hash_file(path: &Path) -> crate::Result<String> {
113    let content = std::fs::read(path).map_err(|e| crate::Error::Io {
114        path: path.display().to_string(),
115        source: e,
116    })?;
117    Ok(blake3::hash(&content).to_hex().to_string())
118}
119
120/// Get the mtime of a path in seconds since epoch.
121///
122/// Returns 0 if the mtime cannot be determined.
123#[must_use]
124pub fn mtime_secs(path: &Path) -> u64 {
125    std::fs::metadata(path)
126        .ok()
127        .and_then(|m| m.modified().ok())
128        .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
129        .map_or(0, |d| d.as_secs())
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135    use std::io::Write;
136    use tempfile::TempDir;
137
138    fn create_file(dir: &Path, relative: &str, content: &str) -> PathBuf {
139        let path = dir.join(relative);
140        if let Some(parent) = path.parent() {
141            std::fs::create_dir_all(parent).unwrap();
142        }
143        let mut f = std::fs::File::create(&path).unwrap();
144        f.write_all(content.as_bytes()).unwrap();
145        path
146    }
147
148    fn manifest_with_file(root: &Path, relative: &str, content: &str) -> Manifest {
149        let path = root.join(relative);
150        let metadata = std::fs::metadata(&path).unwrap();
151        let mtime = metadata
152            .modified()
153            .unwrap()
154            .duration_since(UNIX_EPOCH)
155            .unwrap()
156            .as_secs();
157        let hash = blake3::hash(content.as_bytes()).to_hex().to_string();
158
159        let mut m = Manifest::new("test-model");
160        m.add_file(relative, mtime, metadata.len(), &hash, 1);
161        m
162    }
163
164    #[test]
165    fn detects_new_file() {
166        let dir = TempDir::new().unwrap();
167        create_file(dir.path(), "existing.rs", "fn existing() {}");
168        create_file(dir.path(), "new_file.rs", "fn new() {}");
169
170        let manifest = manifest_with_file(dir.path(), "existing.rs", "fn existing() {}");
171
172        let diff = compute_diff(dir.path(), &manifest).unwrap();
173        assert_eq!(diff.dirty.len(), 1);
174        assert!(diff.dirty[0].ends_with("new_file.rs"));
175        assert_eq!(diff.unchanged, 1);
176        assert!(diff.deleted.is_empty());
177    }
178
179    #[test]
180    fn detects_modified_file() {
181        let dir = TempDir::new().unwrap();
182        create_file(dir.path(), "main.rs", "fn main() {}");
183
184        let manifest = manifest_with_file(dir.path(), "main.rs", "fn main() {}");
185
186        // Modify the file content (different content → different hash)
187        std::thread::sleep(std::time::Duration::from_millis(50));
188        create_file(
189            dir.path(),
190            "main.rs",
191            "fn main() { println!(\"changed\"); }",
192        );
193
194        let diff = compute_diff(dir.path(), &manifest).unwrap();
195        assert_eq!(diff.dirty.len(), 1);
196        assert_eq!(diff.unchanged, 0);
197    }
198
199    #[test]
200    fn detects_deleted_file() {
201        let dir = TempDir::new().unwrap();
202        create_file(dir.path(), "keep.rs", "fn keep() {}");
203
204        let mut manifest = manifest_with_file(dir.path(), "keep.rs", "fn keep() {}");
205        manifest.add_file("deleted.rs", 1000, 100, "oldhash", 1);
206
207        let diff = compute_diff(dir.path(), &manifest).unwrap();
208        assert_eq!(diff.deleted.len(), 1);
209        assert_eq!(diff.deleted[0], "deleted.rs");
210        assert_eq!(diff.unchanged, 1);
211    }
212
213    #[test]
214    fn unchanged_file_detected() {
215        let dir = TempDir::new().unwrap();
216        create_file(dir.path(), "stable.rs", "fn stable() {}");
217
218        let manifest = manifest_with_file(dir.path(), "stable.rs", "fn stable() {}");
219
220        let diff = compute_diff(dir.path(), &manifest).unwrap();
221        assert!(diff.dirty.is_empty());
222        assert!(diff.deleted.is_empty());
223        assert_eq!(diff.unchanged, 1);
224    }
225}