Skip to main content

hivemind/core/
diff.rs

1//! Baseline and diff computation for change detection.
2//!
3//! Captures filesystem state before execution and computes changes
4//! after execution for verification and attribution.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::io::{self, Read};
10use std::path::{Path, PathBuf};
11use uuid::Uuid;
12
13/// File hash (SHA-256 hex string).
14pub type FileHash = String;
15
16/// A snapshot of a file at a point in time.
17#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
18pub struct FileSnapshot {
19    /// Relative path from root.
20    pub path: PathBuf,
21    /// File hash (None if directory or unreadable).
22    pub hash: Option<FileHash>,
23    /// File size in bytes.
24    pub size: u64,
25    /// Whether this is a directory.
26    pub is_dir: bool,
27}
28
29/// A baseline snapshot of a directory tree.
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct Baseline {
32    /// Unique baseline ID.
33    pub id: Uuid,
34    /// Root path this baseline was taken from.
35    pub root: PathBuf,
36    /// Git HEAD commit at baseline time.
37    pub git_head: Option<String>,
38    #[serde(default)]
39    pub git_branches: Vec<String>,
40    /// File snapshots by relative path.
41    pub files: HashMap<PathBuf, FileSnapshot>,
42    /// Timestamp when baseline was captured.
43    pub captured_at: chrono::DateTime<chrono::Utc>,
44}
45
46impl Baseline {
47    /// Captures a baseline from a directory.
48    pub fn capture(root: &Path) -> io::Result<Self> {
49        let mut files = HashMap::new();
50        capture_recursive(root, root, &mut files)?;
51
52        let git_head = get_git_head(root).ok();
53        let git_branches = get_git_branches(root).ok().unwrap_or_default();
54
55        Ok(Self {
56            id: Uuid::new_v4(),
57            root: root.to_path_buf(),
58            git_head,
59            git_branches,
60            files,
61            captured_at: chrono::Utc::now(),
62        })
63    }
64
65    /// Gets a file snapshot by path.
66    pub fn get(&self, path: &Path) -> Option<&FileSnapshot> {
67        self.files.get(path)
68    }
69
70    /// Returns the number of files in the baseline.
71    pub fn file_count(&self) -> usize {
72        self.files.len()
73    }
74}
75
76fn get_git_branches(path: &Path) -> io::Result<Vec<String>> {
77    use std::process::Command;
78
79    let output = Command::new("git")
80        .current_dir(path)
81        .args(["for-each-ref", "refs/heads", "--format=%(refname:short)"])
82        .output()?;
83
84    if !output.status.success() {
85        return Err(io::Error::other("Failed to list git branches"));
86    }
87
88    let mut branches: Vec<String> = String::from_utf8_lossy(&output.stdout)
89        .lines()
90        .map(|l| l.trim().to_string())
91        .filter(|l| !l.is_empty())
92        .collect();
93    branches.sort();
94    branches.dedup();
95    Ok(branches)
96}
97
98fn capture_recursive(
99    root: &Path,
100    current: &Path,
101    files: &mut HashMap<PathBuf, FileSnapshot>,
102) -> io::Result<()> {
103    for entry in fs::read_dir(current)? {
104        let entry = entry?;
105        let path = entry.path();
106        let relative = path.strip_prefix(root).unwrap_or(&path).to_path_buf();
107
108        // Skip .git directory
109        if relative.starts_with(".git") {
110            continue;
111        }
112
113        let metadata = entry.metadata()?;
114        let is_dir = metadata.is_dir();
115
116        let hash = if is_dir {
117            None
118        } else {
119            compute_hash(&path).ok()
120        };
121
122        files.insert(
123            relative.clone(),
124            FileSnapshot {
125                path: relative,
126                hash,
127                size: metadata.len(),
128                is_dir,
129            },
130        );
131
132        if is_dir {
133            capture_recursive(root, &path, files)?;
134        }
135    }
136
137    Ok(())
138}
139
140fn compute_hash(path: &Path) -> io::Result<FileHash> {
141    use std::collections::hash_map::DefaultHasher;
142    use std::hash::{Hash, Hasher};
143
144    let mut file = fs::File::open(path)?;
145    let mut contents = Vec::new();
146    file.read_to_end(&mut contents)?;
147
148    let mut hasher = DefaultHasher::new();
149    contents.hash(&mut hasher);
150    Ok(format!("{:016x}", hasher.finish()))
151}
152
153fn get_git_head(path: &Path) -> io::Result<String> {
154    use std::process::Command;
155
156    let output = Command::new("git")
157        .current_dir(path)
158        .args(["rev-parse", "HEAD"])
159        .output()?;
160
161    if output.status.success() {
162        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
163    } else {
164        Err(io::Error::other("Failed to get git HEAD"))
165    }
166}
167
168/// Type of change detected.
169#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
170#[serde(rename_all = "lowercase")]
171pub enum ChangeType {
172    /// File was created.
173    Created,
174    /// File was modified.
175    Modified,
176    /// File was deleted.
177    Deleted,
178}
179
180/// A detected file change.
181#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
182pub struct FileChange {
183    /// Relative path.
184    pub path: PathBuf,
185    /// Type of change.
186    pub change_type: ChangeType,
187    /// Old hash (for modified/deleted).
188    pub old_hash: Option<FileHash>,
189    /// New hash (for created/modified).
190    pub new_hash: Option<FileHash>,
191}
192
193/// A diff between two states.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct Diff {
196    /// Unique diff ID.
197    pub id: Uuid,
198    /// Task this diff is attributed to.
199    pub task_id: Option<Uuid>,
200    /// Attempt this diff is attributed to.
201    pub attempt_id: Option<Uuid>,
202    /// Base state (before).
203    pub baseline_id: Uuid,
204    /// File changes.
205    pub changes: Vec<FileChange>,
206    /// Computed timestamp.
207    pub computed_at: chrono::DateTime<chrono::Utc>,
208}
209
210impl Diff {
211    /// Computes a diff between a baseline and current state.
212    pub fn compute(baseline: &Baseline, current_root: &Path) -> io::Result<Self> {
213        let mut current_files = HashMap::new();
214        capture_recursive(current_root, current_root, &mut current_files)?;
215
216        let mut changes = Vec::new();
217
218        // Check for modified and deleted files
219        for (path, old_snapshot) in &baseline.files {
220            match current_files.get(path) {
221                Some(new_snapshot) => {
222                    if old_snapshot.hash != new_snapshot.hash {
223                        changes.push(FileChange {
224                            path: path.clone(),
225                            change_type: ChangeType::Modified,
226                            old_hash: old_snapshot.hash.clone(),
227                            new_hash: new_snapshot.hash.clone(),
228                        });
229                    }
230                }
231                None => {
232                    changes.push(FileChange {
233                        path: path.clone(),
234                        change_type: ChangeType::Deleted,
235                        old_hash: old_snapshot.hash.clone(),
236                        new_hash: None,
237                    });
238                }
239            }
240        }
241
242        // Check for created files
243        for (path, new_snapshot) in &current_files {
244            if !baseline.files.contains_key(path) {
245                changes.push(FileChange {
246                    path: path.clone(),
247                    change_type: ChangeType::Created,
248                    old_hash: None,
249                    new_hash: new_snapshot.hash.clone(),
250                });
251            }
252        }
253
254        // Sort changes by path for deterministic output
255        changes.sort_by(|a, b| a.path.cmp(&b.path));
256
257        Ok(Self {
258            id: Uuid::new_v4(),
259            task_id: None,
260            attempt_id: None,
261            baseline_id: baseline.id,
262            changes,
263            computed_at: chrono::Utc::now(),
264        })
265    }
266
267    /// Attributes this diff to a task.
268    #[must_use]
269    pub fn for_task(mut self, task_id: Uuid) -> Self {
270        self.task_id = Some(task_id);
271        self
272    }
273
274    /// Attributes this diff to an attempt.
275    #[must_use]
276    pub fn for_attempt(mut self, attempt_id: Uuid) -> Self {
277        self.attempt_id = Some(attempt_id);
278        self
279    }
280
281    /// Returns true if there are no changes.
282    pub fn is_empty(&self) -> bool {
283        self.changes.is_empty()
284    }
285
286    /// Returns the number of changes.
287    pub fn change_count(&self) -> usize {
288        self.changes.len()
289    }
290
291    /// Returns changes of a specific type.
292    pub fn changes_of_type(&self, change_type: ChangeType) -> Vec<&FileChange> {
293        self.changes
294            .iter()
295            .filter(|c| c.change_type == change_type)
296            .collect()
297    }
298
299    /// Returns all modified paths.
300    pub fn modified_paths(&self) -> Vec<&Path> {
301        self.changes.iter().map(|c| c.path.as_path()).collect()
302    }
303}
304
305/// Unified diff format for a single file.
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct UnifiedDiff {
308    /// File path.
309    pub path: PathBuf,
310    /// Diff content in unified format.
311    pub content: String,
312}
313
314/// Computes unified diff between two files.
315pub fn unified_diff(old_path: Option<&Path>, new_path: Option<&Path>) -> io::Result<String> {
316    use std::process::Command;
317
318    let (old, new) = match (old_path, new_path) {
319        (Some(old), Some(new)) => (old.to_str().unwrap_or(""), new.to_str().unwrap_or("")),
320        (Some(old), None) => (old.to_str().unwrap_or(""), "/dev/null"),
321        (None, Some(new)) => ("/dev/null", new.to_str().unwrap_or("")),
322        (None, None) => return Ok(String::new()),
323    };
324
325    let output = Command::new("diff").args(["-u", old, new]).output()?;
326
327    // diff returns 1 if files differ, which is expected
328    Ok(String::from_utf8_lossy(&output.stdout).to_string())
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use tempfile::TempDir;
335
336    fn create_test_dir() -> TempDir {
337        let dir = TempDir::new().unwrap();
338        fs::write(dir.path().join("file1.txt"), "content1").unwrap();
339        fs::write(dir.path().join("file2.txt"), "content2").unwrap();
340        fs::create_dir(dir.path().join("subdir")).unwrap();
341        fs::write(dir.path().join("subdir/file3.txt"), "content3").unwrap();
342        dir
343    }
344
345    #[test]
346    fn capture_baseline() {
347        let dir = create_test_dir();
348        let baseline = Baseline::capture(dir.path()).unwrap();
349
350        assert!(baseline.file_count() >= 3);
351        assert!(baseline.get(Path::new("file1.txt")).is_some());
352    }
353
354    #[test]
355    fn detect_created_file() {
356        let dir = create_test_dir();
357        let baseline = Baseline::capture(dir.path()).unwrap();
358
359        // Create a new file
360        fs::write(dir.path().join("new_file.txt"), "new content").unwrap();
361
362        let diff = Diff::compute(&baseline, dir.path()).unwrap();
363
364        let created: Vec<_> = diff.changes_of_type(ChangeType::Created);
365        assert_eq!(created.len(), 1);
366        assert_eq!(created[0].path, Path::new("new_file.txt"));
367    }
368
369    #[test]
370    fn detect_modified_file() {
371        let dir = create_test_dir();
372        let baseline = Baseline::capture(dir.path()).unwrap();
373
374        // Modify a file
375        fs::write(dir.path().join("file1.txt"), "modified content").unwrap();
376
377        let diff = Diff::compute(&baseline, dir.path()).unwrap();
378
379        let modified: Vec<_> = diff.changes_of_type(ChangeType::Modified);
380        assert_eq!(modified.len(), 1);
381        assert_eq!(modified[0].path, Path::new("file1.txt"));
382    }
383
384    #[test]
385    fn detect_deleted_file() {
386        let dir = create_test_dir();
387        let baseline = Baseline::capture(dir.path()).unwrap();
388
389        // Delete a file
390        fs::remove_file(dir.path().join("file1.txt")).unwrap();
391
392        let diff = Diff::compute(&baseline, dir.path()).unwrap();
393
394        let deleted: Vec<_> = diff.changes_of_type(ChangeType::Deleted);
395        assert_eq!(deleted.len(), 1);
396        assert_eq!(deleted[0].path, Path::new("file1.txt"));
397    }
398
399    #[test]
400    fn no_changes_empty_diff() {
401        let dir = create_test_dir();
402        let baseline = Baseline::capture(dir.path()).unwrap();
403
404        let diff = Diff::compute(&baseline, dir.path()).unwrap();
405
406        assert!(diff.is_empty());
407    }
408
409    #[test]
410    fn diff_attribution() {
411        let dir = create_test_dir();
412        let baseline = Baseline::capture(dir.path()).unwrap();
413
414        let task_id = Uuid::new_v4();
415        let attempt_id = Uuid::new_v4();
416
417        let diff = Diff::compute(&baseline, dir.path())
418            .unwrap()
419            .for_task(task_id)
420            .for_attempt(attempt_id);
421
422        assert_eq!(diff.task_id, Some(task_id));
423        assert_eq!(diff.attempt_id, Some(attempt_id));
424    }
425
426    #[test]
427    fn file_change_serialization() {
428        let change = FileChange {
429            path: PathBuf::from("test.txt"),
430            change_type: ChangeType::Modified,
431            old_hash: Some("abc".to_string()),
432            new_hash: Some("def".to_string()),
433        };
434
435        let json = serde_json::to_string(&change).unwrap();
436        let restored: FileChange = serde_json::from_str(&json).unwrap();
437
438        assert_eq!(change, restored);
439    }
440}