Skip to main content

void_core/diff/
working.rs

1//! Working tree diff operations.
2//!
3//! Computes differences between commits/index and the working tree.
4
5use std::collections::HashMap;
6use std::fs;
7use std::path::Path;
8
9use rayon::prelude::*;
10
11use super::collect::{collect_commit_files, load_commit_and_reader, FileEntry};
12use super::rename::apply_rename_detection;
13use super::types::{DiffKind, FileDiff, TreeDiff};
14use crate::cid::VoidCid;
15use crate::crypto::KeyVault;
16use crate::index::WorkspaceIndex;
17use crate::store::ObjectStoreExt;
18use crate::support::configure_walker;
19use crate::{ContentHash, Result, VoidError};
20
21/// Walks a workspace directory and collects file entries with their hashes.
22///
23/// Uses rayon for parallel hashing.
24fn collect_workspace_files(workspace: &Path) -> Result<Vec<FileEntry>> {
25    // Use ignore crate to respect repo ignore rules (match staging behavior)
26    let mut builder = ignore::WalkBuilder::new(workspace);
27    let walker = configure_walker(&mut builder)
28        .filter_entry(|entry| {
29            let name = entry.file_name().to_string_lossy();
30            name != ".void" && name != ".git" && name != "node_modules" && name != ".DS_Store"
31        })
32        .build();
33
34    let paths: Vec<_> = walker
35        .filter_map(|e| e.ok())
36        .filter(|e| e.file_type().map(|ft| ft.is_file()).unwrap_or(false))
37        .filter_map(|e| {
38            let path = e.path();
39            path.strip_prefix(workspace)
40                .ok()
41                .map(|rel| rel.to_path_buf())
42        })
43        .collect();
44
45    // Parallel hash computation
46    let entries: Result<Vec<FileEntry>> = paths
47        .par_iter()
48        .map(|rel_path| {
49            let full_path = workspace.join(rel_path);
50            let content = fs::read(&full_path)?;
51            let hash = ContentHash::digest(&content);
52
53            let path_str = rel_path.to_str().ok_or_else(|| {
54                VoidError::Io(std::io::Error::new(
55                    std::io::ErrorKind::InvalidData,
56                    "path not valid UTF-8",
57                ))
58            })?;
59
60            // Normalize path separators to forward slashes
61            let normalized = path_str.replace('\\', "/");
62
63            Ok(FileEntry {
64                path: normalized,
65                content_hash: hash,
66            })
67        })
68        .collect();
69
70    entries
71}
72
73/// Computes the diff between a commit and the working tree.
74///
75/// # Arguments
76/// * `store` - Object store for fetching encrypted objects
77/// * `vault` - Key vault for decryption
78/// * `commit` - CID of the commit to compare against
79/// * `workspace` - Path to the workspace directory
80///
81/// # Returns
82/// A `TreeDiff` showing changes in the working tree relative to the commit.
83pub fn diff_working<S: ObjectStoreExt>(
84    store: &S,
85    vault: &KeyVault,
86    commit: &VoidCid,
87    workspace: &Path,
88) -> Result<TreeDiff> {
89    // Load commit files via manifest
90    let (commit_obj, reader) = load_commit_and_reader(store, vault, commit)?;
91    let commit_files = collect_commit_files(store, &commit_obj, &reader)?;
92
93    // Build map of commit files: path -> hash
94    let commit_map: HashMap<&str, ContentHash> = commit_files
95        .iter()
96        .map(|e| (e.path.as_str(), e.content_hash))
97        .collect();
98
99    // Collect workspace files with parallel hashing
100    let workspace_files = collect_workspace_files(workspace)?;
101
102    // Build map of workspace files: path -> hash
103    let workspace_map: HashMap<&str, ContentHash> = workspace_files
104        .iter()
105        .map(|e| (e.path.as_str(), e.content_hash))
106        .collect();
107
108    let mut diffs = Vec::new();
109
110    // Find deleted and modified files
111    for entry in &commit_files {
112        match workspace_map.get(entry.path.as_str()) {
113            None => {
114                // Deleted from workspace
115                diffs.push(FileDiff {
116                    path: entry.path.clone(),
117                    kind: DiffKind::Deleted,
118                    old_hash: Some(entry.content_hash),
119                    new_hash: None,
120                });
121            }
122            Some(&ws_hash) => {
123                if ws_hash != entry.content_hash {
124                    // Modified in workspace
125                    diffs.push(FileDiff {
126                        path: entry.path.clone(),
127                        kind: DiffKind::Modified,
128                        old_hash: Some(entry.content_hash),
129                        new_hash: Some(ws_hash),
130                    });
131                }
132            }
133        }
134    }
135
136    // Find added files
137    for entry in &workspace_files {
138        if !commit_map.contains_key(entry.path.as_str()) {
139            diffs.push(FileDiff {
140                path: entry.path.clone(),
141                kind: DiffKind::Added,
142                old_hash: None,
143                new_hash: Some(entry.content_hash),
144            });
145        }
146    }
147
148    let files = apply_rename_detection(diffs);
149
150    Ok(TreeDiff { files })
151}
152
153/// Computes the diff between the index and the working tree.
154///
155/// # Arguments
156/// * `index` - The workspace index (staged files)
157/// * `workspace` - Path to the workspace directory
158///
159/// # Returns
160/// A `TreeDiff` showing unstaged changes (working tree vs index).
161pub fn diff_index(index: &WorkspaceIndex, workspace: &Path) -> Result<TreeDiff> {
162    // Build map of index entries: path -> hash
163    let index_map: HashMap<&str, ContentHash> = index
164        .iter()
165        .map(|e| (e.path.as_str(), e.content_hash))
166        .collect();
167
168    // Collect workspace files with parallel hashing
169    let workspace_files = collect_workspace_files(workspace)?;
170
171    // Build map of workspace files: path -> hash
172    let workspace_map: HashMap<&str, ContentHash> = workspace_files
173        .iter()
174        .map(|e| (e.path.as_str(), e.content_hash))
175        .collect();
176
177    let mut diffs = Vec::new();
178
179    // Find deleted and modified files (relative to index)
180    for entry in index.iter() {
181        match workspace_map.get(entry.path.as_str()) {
182            None => {
183                // Deleted from workspace
184                diffs.push(FileDiff {
185                    path: entry.path.clone(),
186                    kind: DiffKind::Deleted,
187                    old_hash: Some(entry.content_hash),
188                    new_hash: None,
189                });
190            }
191            Some(&ws_hash) => {
192                if ws_hash != entry.content_hash {
193                    // Modified in workspace
194                    diffs.push(FileDiff {
195                        path: entry.path.clone(),
196                        kind: DiffKind::Modified,
197                        old_hash: Some(entry.content_hash),
198                        new_hash: Some(ws_hash),
199                    });
200                }
201            }
202        }
203    }
204
205    // Find untracked files (in workspace but not in index)
206    for entry in &workspace_files {
207        if !index_map.contains_key(entry.path.as_str()) {
208            diffs.push(FileDiff {
209                path: entry.path.clone(),
210                kind: DiffKind::Added,
211                old_hash: None,
212                new_hash: Some(entry.content_hash),
213            });
214        }
215    }
216
217    let files = apply_rename_detection(diffs);
218
219    Ok(TreeDiff { files })
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use std::sync::Arc;
226    use crate::cid::ToVoidCid;
227    use crate::crypto::{self, KeyVault};
228    use crate::index::IndexEntry;
229    use crate::metadata::ShardMap;
230    use crate::pipeline::{commit_workspace, CommitOptions, SealOptions};
231    use crate::VoidContext;
232    use camino::Utf8PathBuf;
233    use tempfile::TempDir;
234
235    fn setup_test_workspace() -> (
236        TempDir,
237        std::path::PathBuf,
238        std::path::PathBuf,
239        [u8; 32],
240        [u8; 32],
241    ) {
242        let dir = TempDir::new().unwrap();
243        let root = dir.path().to_path_buf();
244        let void_dir = root.join(".void");
245        fs::create_dir_all(void_dir.join("objects")).unwrap();
246
247        let key = crypto::generate_key();
248        let repo_secret = crypto::generate_key();
249
250        (dir, root, void_dir, key, repo_secret)
251    }
252
253    fn make_test_entry(path: &str, hash_byte: u8) -> IndexEntry {
254        IndexEntry {
255            path: path.to_string(),
256            content_hash: ContentHash([hash_byte; 32]),
257            mtime_secs: 1700000000,
258            mtime_nanos: 123456789,
259            size: 100,
260            materialized: true,
261        }
262    }
263
264    #[test]
265    fn diff_empty_index_with_workspace() {
266        let temp = TempDir::new().unwrap();
267        let workspace = temp.path();
268
269        // Create some files in workspace
270        fs::write(workspace.join("file1.txt"), "hello").unwrap();
271        fs::write(workspace.join("file2.txt"), "world").unwrap();
272
273        let index = WorkspaceIndex::empty();
274        let diff = diff_index(&index, workspace).unwrap();
275
276        // Both files should be Added
277        assert_eq!(diff.len(), 2);
278        assert!(diff.files.iter().all(|f| matches!(f.kind, DiffKind::Added)));
279
280        let paths: Vec<_> = diff.files.iter().map(|f| f.path.as_str()).collect();
281        assert!(paths.contains(&"file1.txt"));
282        assert!(paths.contains(&"file2.txt"));
283    }
284
285    #[test]
286    fn diff_index_with_modified_file() {
287        let temp = TempDir::new().unwrap();
288        let workspace = temp.path();
289
290        // Create file
291        fs::write(workspace.join("file.txt"), "original").unwrap();
292
293        // Create index with different hash
294        let mut index = WorkspaceIndex::empty();
295        index.entries.push(make_test_entry("file.txt", 0xAA));
296
297        let diff = diff_index(&index, workspace).unwrap();
298
299        assert_eq!(diff.len(), 1);
300        assert_eq!(diff.files[0].path, "file.txt");
301        assert!(matches!(diff.files[0].kind, DiffKind::Modified));
302    }
303
304    #[test]
305    fn diff_index_with_deleted_file() {
306        let temp = TempDir::new().unwrap();
307        let workspace = temp.path();
308
309        // Create index with a file that doesn't exist in workspace
310        let mut index = WorkspaceIndex::empty();
311        index.entries.push(make_test_entry("deleted.txt", 0xBB));
312
313        let diff = diff_index(&index, workspace).unwrap();
314
315        assert_eq!(diff.len(), 1);
316        assert_eq!(diff.files[0].path, "deleted.txt");
317        assert!(matches!(diff.files[0].kind, DiffKind::Deleted));
318    }
319
320    #[test]
321    fn diff_index_detects_rename() {
322        let temp = TempDir::new().unwrap();
323        let workspace = temp.path();
324
325        let content = b"same content";
326        fs::write(workspace.join("new.txt"), content).unwrap();
327        let hash = ContentHash::digest(content);
328
329        let mut index = WorkspaceIndex::empty();
330        index.entries.push(IndexEntry {
331            path: "old.txt".to_string(),
332            content_hash: hash,
333            mtime_secs: 1700000000,
334            mtime_nanos: 0,
335            size: content.len() as u64,
336            materialized: true,
337        });
338
339        let diff = diff_index(&index, workspace).unwrap();
340
341        assert_eq!(diff.len(), 1);
342        assert_eq!(diff.files[0].path, "new.txt");
343        match &diff.files[0].kind {
344            DiffKind::Renamed { from, similarity } => {
345                assert_eq!(from, "old.txt");
346                assert_eq!(*similarity, 100);
347            }
348            other => panic!("expected renamed diff, got {other:?}"),
349        }
350    }
351
352    #[test]
353    fn diff_index_with_no_changes() {
354        let temp = TempDir::new().unwrap();
355        let workspace = temp.path();
356
357        // Create file
358        let content = b"hello world";
359        fs::write(workspace.join("file.txt"), content).unwrap();
360
361        // Create index with matching hash
362        let hash = ContentHash::digest(content);
363        let mut index = WorkspaceIndex::empty();
364        index.entries.push(IndexEntry {
365            path: "file.txt".to_string(),
366            content_hash: hash,
367            mtime_secs: 1700000000,
368            mtime_nanos: 0,
369            size: content.len() as u64,
370            materialized: true,
371        });
372
373        let diff = diff_index(&index, workspace).unwrap();
374
375        assert!(diff.is_empty());
376    }
377
378    #[test]
379    fn diff_working_detects_changes() {
380        let (_dir, root, void_dir, key, repo_secret) = setup_test_workspace();
381        let vault = KeyVault::new(key).expect("key derivation should not fail");
382
383        // Create initial files
384        fs::write(root.join("file1.txt"), "original").unwrap();
385        fs::write(root.join("file2.txt"), "unchanged").unwrap();
386
387        // Commit
388        let vault = Arc::new(KeyVault::new(key).unwrap());
389        let mut ctx = VoidContext::headless(&void_dir, Arc::clone(&vault), 0).unwrap();
390        ctx.paths.root = Utf8PathBuf::try_from(root.clone()).unwrap();
391        ctx.repo.secret = void_crypto::RepoSecret::new(repo_secret);
392
393        let seal_opts = SealOptions {
394            ctx,
395            shard_map: ShardMap::new(64),
396            ..Default::default()
397        };
398
399        let result = commit_workspace(CommitOptions {
400            seal: seal_opts,
401            message: "initial".into(),
402            parent_cid: None,
403            allow_data_loss: false,
404            foreign_parent: false,
405        })
406        .unwrap();
407
408        // Modify workspace
409        fs::write(root.join("file1.txt"), "modified").unwrap();
410        fs::write(root.join("new.txt"), "new file").unwrap();
411
412        // Diff working tree against commit
413        let objects_dir = Utf8PathBuf::try_from(void_dir.join("objects")).unwrap();
414        let store = crate::store::FsStore::new(objects_dir).unwrap();
415
416        let commit_cid = result.commit_cid.to_void_cid().unwrap();
417        let diff = diff_working(&store, &vault, &commit_cid, &root).unwrap();
418
419        // Should have: 1 added (new.txt), 1 modified (file1.txt)
420        let stats = diff.stats();
421        assert_eq!(stats.added, 1);
422        assert_eq!(stats.modified, 1);
423        assert_eq!(stats.deleted, 0);
424    }
425}