Skip to main content

git_atomic/git/
walk.rs

1use crate::core::GitError;
2use gix::ObjectId;
3use std::collections::HashSet;
4use std::path::PathBuf;
5
6/// Walk commits reachable from `end` but not from `start`, returned oldest-first.
7///
8/// This is equivalent to `git log --reverse start..end`.
9pub fn walk_range(
10    repo: &gix::Repository,
11    start: ObjectId,
12    end: ObjectId,
13) -> Result<Vec<ObjectId>, GitError> {
14    // Collect ancestors of start (the exclusion set)
15    let mut excluded = HashSet::new();
16    let mut queue = vec![start];
17    while let Some(id) = queue.pop() {
18        if excluded.insert(id)
19            && let Ok(commit) = repo.find_commit(id)
20        {
21            for parent in commit.parent_ids() {
22                queue.push(parent.detach());
23            }
24        }
25    }
26
27    // Walk from end, collecting commits not in the exclusion set
28    let mut result = Vec::new();
29    let mut walk_queue = vec![end];
30    let mut visited = HashSet::new();
31    while let Some(id) = walk_queue.pop() {
32        if excluded.contains(&id) || !visited.insert(id) {
33            continue;
34        }
35        result.push(id);
36        if let Ok(commit) = repo.find_commit(id) {
37            for parent in commit.parent_ids() {
38                walk_queue.push(parent.detach());
39            }
40        }
41    }
42
43    // Reverse to get oldest-first (topological)
44    result.reverse();
45    Ok(result)
46}
47
48/// Compute the set of files that differ between two commits (effective files).
49///
50/// Files that are identical (or absent) at both endpoints are net-zero and
51/// excluded from the result. Only files that actually changed between the
52/// two trees are returned.
53pub fn effective_files(
54    repo: &gix::Repository,
55    start: ObjectId,
56    end: ObjectId,
57) -> Result<HashSet<PathBuf>, GitError> {
58    let start_commit = repo
59        .find_commit(start)
60        .map_err(|e| GitError::Operation(format!("find start commit: {e}")))?;
61    let start_tree = start_commit
62        .tree()
63        .map_err(|e| GitError::Operation(format!("start tree: {e}")))?;
64
65    let end_commit = repo
66        .find_commit(end)
67        .map_err(|e| GitError::Operation(format!("find end commit: {e}")))?;
68    let end_tree = end_commit
69        .tree()
70        .map_err(|e| GitError::Operation(format!("end tree: {e}")))?;
71
72    let changes = repo
73        .diff_tree_to_tree(Some(&start_tree), Some(&end_tree), None)
74        .map_err(|e| GitError::Operation(format!("diff start..end: {e}")))?;
75
76    let paths: HashSet<PathBuf> = changes
77        .iter()
78        .map(|change| PathBuf::from(change.location().to_string()))
79        .collect();
80
81    Ok(paths)
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87    use std::path::Path;
88    use std::process::Command;
89
90    fn git(dir: &Path, args: &[&str]) -> String {
91        let out = Command::new("git")
92            .args(args)
93            .current_dir(dir)
94            .output()
95            .unwrap();
96        String::from_utf8_lossy(&out.stdout).trim().to_string()
97    }
98
99    fn init_repo(dir: &Path) {
100        git(dir, &["init", "-b", "main"]);
101        git(dir, &["config", "user.email", "test@test.com"]);
102        git(dir, &["config", "user.name", "Test"]);
103        git(dir, &["commit", "--allow-empty", "-m", "initial"]);
104    }
105
106    #[test]
107    fn walk_range_returns_commits_oldest_first() {
108        let dir = tempfile::tempdir().unwrap();
109        init_repo(dir.path());
110
111        let base = git(dir.path(), &["rev-parse", "HEAD"]);
112
113        std::fs::write(dir.path().join("a.txt"), "a").unwrap();
114        git(dir.path(), &["add", "."]);
115        git(dir.path(), &["commit", "-m", "commit 1"]);
116
117        std::fs::write(dir.path().join("b.txt"), "b").unwrap();
118        git(dir.path(), &["add", "."]);
119        git(dir.path(), &["commit", "-m", "commit 2"]);
120
121        std::fs::write(dir.path().join("c.txt"), "c").unwrap();
122        git(dir.path(), &["add", "."]);
123        git(dir.path(), &["commit", "-m", "commit 3"]);
124
125        let repo = crate::git::open_repo(dir.path()).unwrap();
126        let start = crate::git::resolve_commit(&repo, &base).unwrap();
127        let end = crate::git::resolve_commit(&repo, "HEAD").unwrap();
128
129        let commits = walk_range(&repo, start, end).unwrap();
130        assert_eq!(commits.len(), 3);
131
132        // Verify oldest-first: first commit should be "commit 1"
133        let first = repo.find_commit(commits[0]).unwrap();
134        let msg = first.message_raw_sloppy().to_string();
135        assert!(msg.contains("commit 1"), "expected 'commit 1', got: {msg}");
136
137        let last = repo.find_commit(commits[2]).unwrap();
138        let msg = last.message_raw_sloppy().to_string();
139        assert!(msg.contains("commit 3"), "expected 'commit 3', got: {msg}");
140    }
141
142    #[test]
143    fn walk_range_empty_when_same_commit() {
144        let dir = tempfile::tempdir().unwrap();
145        init_repo(dir.path());
146
147        let repo = crate::git::open_repo(dir.path()).unwrap();
148        let head = crate::git::resolve_commit(&repo, "HEAD").unwrap();
149
150        let commits = walk_range(&repo, head, head).unwrap();
151        assert!(commits.is_empty());
152    }
153
154    #[test]
155    fn effective_files_detects_net_zero() {
156        let dir = tempfile::tempdir().unwrap();
157        init_repo(dir.path());
158
159        let base = git(dir.path(), &["rev-parse", "HEAD"]);
160
161        // Add foo and bar
162        std::fs::write(dir.path().join("foo.txt"), "foo").unwrap();
163        std::fs::write(dir.path().join("bar.txt"), "bar").unwrap();
164        git(dir.path(), &["add", "."]);
165        git(dir.path(), &["commit", "-m", "add files"]);
166
167        // Delete foo (net-zero: added then deleted)
168        std::fs::remove_file(dir.path().join("foo.txt")).unwrap();
169        git(dir.path(), &["add", "."]);
170        git(dir.path(), &["commit", "-m", "remove foo"]);
171
172        let repo = crate::git::open_repo(dir.path()).unwrap();
173        let start = crate::git::resolve_commit(&repo, &base).unwrap();
174        let end = crate::git::resolve_commit(&repo, "HEAD").unwrap();
175
176        let effective = effective_files(&repo, start, end).unwrap();
177
178        // bar.txt is effective (added and still present)
179        assert!(effective.contains(&PathBuf::from("bar.txt")));
180        // foo.txt is net-zero (added then deleted)
181        assert!(!effective.contains(&PathBuf::from("foo.txt")));
182    }
183
184    #[test]
185    fn effective_files_empty_when_same() {
186        let dir = tempfile::tempdir().unwrap();
187        init_repo(dir.path());
188
189        let repo = crate::git::open_repo(dir.path()).unwrap();
190        let head = crate::git::resolve_commit(&repo, "HEAD").unwrap();
191
192        let effective = effective_files(&repo, head, head).unwrap();
193        assert!(effective.is_empty());
194    }
195}