seshat_scanner/
git_dates.rs1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9
10use crate::error::ScanError;
11
12#[tracing::instrument(skip_all, fields(repo_root = %repo_root.display()))]
29pub fn collect_git_file_dates(repo_root: &Path) -> Result<HashMap<PathBuf, i64>, ScanError> {
30 let repo = match gix::discover(repo_root) {
33 Ok(r) => r,
34 Err(_) => {
35 tracing::debug!("Not a git repository, skipping file date collection");
36 return Ok(HashMap::new());
37 }
38 };
39
40 let head_commit = match repo.head_commit() {
42 Ok(c) => c,
43 Err(_) => {
44 tracing::debug!("No HEAD commit found (empty repo), skipping file date collection");
45 return Ok(HashMap::new());
46 }
47 };
48
49 let mut file_dates: HashMap<PathBuf, i64> = HashMap::new();
50
51 let walk = head_commit
56 .ancestors()
57 .all()
58 .map_err(|e| ScanError::GitError(format!("Failed to walk commit ancestors: {e}")))?;
59
60 for info in walk {
61 let info = info
62 .map_err(|e| ScanError::GitError(format!("Failed to read commit during walk: {e}")))?;
63
64 let commit = info
65 .id()
66 .object()
67 .map_err(|e| ScanError::GitError(format!("Failed to read commit object: {e}")))?
68 .into_commit();
69
70 let commit_time = commit
71 .time()
72 .map_err(|e| ScanError::GitError(format!("Failed to read commit time: {e}")))?;
73 let timestamp = commit_time.seconds;
74
75 let tree = commit
76 .tree()
77 .map_err(|e| ScanError::GitError(format!("Failed to read commit tree: {e}")))?;
78
79 let parent_tree = commit
81 .parent_ids()
82 .next()
83 .and_then(|parent_id| parent_id.object().ok()?.into_commit().tree().ok());
84
85 let changes = match &parent_tree {
87 Some(parent) => {
88 let mut changes = Vec::new();
89 let mut platform = parent.changes().map_err(|e| {
90 ScanError::GitError(format!("Failed to create tree changes tracker: {e}"))
91 })?;
92 platform.options(|opts| {
93 opts.track_path();
94 });
95 platform
96 .for_each_to_obtain_tree(&tree, |change| {
97 let path = PathBuf::from(change.location().to_string());
98 changes.push(path);
99 Ok::<_, std::convert::Infallible>(
100 gix::object::tree::diff::Action::Continue(()),
101 )
102 })
103 .map_err(|e| ScanError::GitError(format!("Failed to diff trees: {e}")))?;
104 changes
105 }
106 None => {
107 let mut changes = Vec::new();
109 tree_paths(&tree, &mut changes)?;
110 changes
111 }
112 };
113
114 for path in changes {
115 file_dates.entry(path).or_insert(timestamp);
117 }
118 }
119
120 tracing::info!(
121 files_with_dates = file_dates.len(),
122 "Collected git file dates"
123 );
124
125 if file_dates.is_empty() {
126 tracing::warn!(
127 repo_root = %repo_root.display(),
128 "No file dates collected — git history may be shallow, the repo may be a bare \
129 clone, or the worktree walk encountered an unexpected layout"
130 );
131 }
132
133 Ok(file_dates)
134}
135
136fn tree_paths(tree: &gix::Tree<'_>, paths: &mut Vec<PathBuf>) -> Result<(), ScanError> {
138 let mut recorder = gix::traverse::tree::Recorder::default();
139 tree.traverse()
140 .breadthfirst(&mut recorder)
141 .map_err(|e| ScanError::GitError(format!("Failed to traverse tree: {e}")))?;
142
143 for entry in recorder.records {
144 if entry.mode.is_blob() {
145 paths.push(PathBuf::from(entry.filepath.to_string()));
146 }
147 }
148
149 Ok(())
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155 use std::fs;
156 use std::process::Command;
157 use tempfile::tempdir;
158
159 fn init_git_repo(dir: &Path) {
161 Command::new("git")
162 .args(["init", "-b", "main"])
163 .current_dir(dir)
164 .output()
165 .expect("git init");
166 Command::new("git")
167 .args(["config", "user.email", "test@test.com"])
168 .current_dir(dir)
169 .output()
170 .expect("git config email");
171 Command::new("git")
172 .args(["config", "user.name", "Test User"])
173 .current_dir(dir)
174 .output()
175 .expect("git config name");
176 }
177
178 fn git_add_and_commit(dir: &Path, message: &str) {
179 Command::new("git")
180 .args(["add", "."])
181 .current_dir(dir)
182 .output()
183 .expect("git add");
184 Command::new("git")
185 .args(["commit", "-m", message, "--allow-empty-message"])
186 .current_dir(dir)
187 .output()
188 .expect("git commit");
189 }
190
191 #[test]
192 fn non_git_directory_returns_empty() {
193 let dir = tempdir().expect("tempdir");
194 let result = collect_git_file_dates(dir.path()).expect("should not error");
195 assert!(result.is_empty(), "non-git dir should return empty map");
196 }
197
198 #[test]
199 fn empty_repo_returns_empty() {
200 let dir = tempdir().expect("tempdir");
201 init_git_repo(dir.path());
202
203 let result = collect_git_file_dates(dir.path()).expect("should not error");
204 assert!(result.is_empty(), "empty repo should return empty map");
205 }
206
207 #[test]
208 fn collects_dates_for_committed_files() {
209 let dir = tempdir().expect("tempdir");
210 init_git_repo(dir.path());
211
212 fs::write(dir.path().join("hello.txt"), "hello").expect("write file");
214 git_add_and_commit(dir.path(), "first commit");
215
216 fs::write(dir.path().join("world.txt"), "world").expect("write file");
218 git_add_and_commit(dir.path(), "second commit");
219
220 let dates = collect_git_file_dates(dir.path()).expect("collect dates");
221 assert!(
222 dates.contains_key(&PathBuf::from("hello.txt")),
223 "should have hello.txt"
224 );
225 assert!(
226 dates.contains_key(&PathBuf::from("world.txt")),
227 "should have world.txt"
228 );
229
230 for (path, ts) in &dates {
232 assert!(
233 *ts > 0,
234 "timestamp for {} should be positive, got {}",
235 path.display(),
236 ts
237 );
238 }
239 }
240
241 #[test]
242 fn most_recent_date_wins() {
243 let dir = tempdir().expect("tempdir");
244 init_git_repo(dir.path());
245
246 fs::write(dir.path().join("file.txt"), "v1").expect("write");
248 git_add_and_commit(dir.path(), "first");
249
250 std::thread::sleep(std::time::Duration::from_secs(1));
252
253 fs::write(dir.path().join("file.txt"), "v2").expect("write");
255 git_add_and_commit(dir.path(), "second");
256
257 let dates = collect_git_file_dates(dir.path()).expect("collect dates");
258 let file_date = dates
259 .get(&PathBuf::from("file.txt"))
260 .expect("should have file.txt");
261
262 assert!(*file_date > 0, "should have a positive timestamp");
265 }
266
267 #[test]
268 fn handles_subdirectories() {
269 let dir = tempdir().expect("tempdir");
270 init_git_repo(dir.path());
271
272 let sub = dir.path().join("src");
273 fs::create_dir_all(&sub).expect("mkdir");
274 fs::write(sub.join("main.rs"), "fn main() {}").expect("write");
275 git_add_and_commit(dir.path(), "with subdirectory");
276
277 let dates = collect_git_file_dates(dir.path()).expect("collect dates");
278 assert!(
279 dates.contains_key(&PathBuf::from("src/main.rs")),
280 "should have src/main.rs, got keys: {:?}",
281 dates.keys().collect::<Vec<_>>()
282 );
283 }
284
285 #[test]
286 fn keys_are_relative_not_absolute() {
287 let dir = tempdir().expect("tempdir");
290 init_git_repo(dir.path());
291
292 fs::write(dir.path().join("config.toml"), "[package]").expect("write");
293 git_add_and_commit(dir.path(), "add config");
294
295 let dates = collect_git_file_dates(dir.path()).expect("collect dates");
296
297 assert!(
299 dates.contains_key(&PathBuf::from("config.toml")),
300 "relative path must be a key"
301 );
302
303 let abs = dir.path().join("config.toml");
305 assert!(
306 !dates.contains_key(abs.as_path()),
307 "absolute path must NOT be a key — callers must strip the root prefix"
308 );
309 }
310}