use std::collections::HashMap;
use std::path::{Path, PathBuf};
use crate::error::ScanError;
#[tracing::instrument(skip_all, fields(repo_root = %repo_root.display()))]
pub fn collect_git_file_dates(repo_root: &Path) -> Result<HashMap<PathBuf, i64>, ScanError> {
let repo = match gix::discover(repo_root) {
Ok(r) => r,
Err(_) => {
tracing::debug!("Not a git repository, skipping file date collection");
return Ok(HashMap::new());
}
};
let head_commit = match repo.head_commit() {
Ok(c) => c,
Err(_) => {
tracing::debug!("No HEAD commit found (empty repo), skipping file date collection");
return Ok(HashMap::new());
}
};
let mut file_dates: HashMap<PathBuf, i64> = HashMap::new();
let walk = head_commit
.ancestors()
.all()
.map_err(|e| ScanError::GitError(format!("Failed to walk commit ancestors: {e}")))?;
for info in walk {
let info = info
.map_err(|e| ScanError::GitError(format!("Failed to read commit during walk: {e}")))?;
let commit = info
.id()
.object()
.map_err(|e| ScanError::GitError(format!("Failed to read commit object: {e}")))?
.into_commit();
let commit_time = commit
.time()
.map_err(|e| ScanError::GitError(format!("Failed to read commit time: {e}")))?;
let timestamp = commit_time.seconds;
let tree = commit
.tree()
.map_err(|e| ScanError::GitError(format!("Failed to read commit tree: {e}")))?;
let parent_tree = commit
.parent_ids()
.next()
.and_then(|parent_id| parent_id.object().ok()?.into_commit().tree().ok());
let changes = match &parent_tree {
Some(parent) => {
let mut changes = Vec::new();
let mut platform = parent.changes().map_err(|e| {
ScanError::GitError(format!("Failed to create tree changes tracker: {e}"))
})?;
platform.options(|opts| {
opts.track_path();
});
platform
.for_each_to_obtain_tree(&tree, |change| {
let path = PathBuf::from(change.location().to_string());
changes.push(path);
Ok::<_, std::convert::Infallible>(
gix::object::tree::diff::Action::Continue(()),
)
})
.map_err(|e| ScanError::GitError(format!("Failed to diff trees: {e}")))?;
changes
}
None => {
let mut changes = Vec::new();
tree_paths(&tree, &mut changes)?;
changes
}
};
for path in changes {
file_dates.entry(path).or_insert(timestamp);
}
}
tracing::info!(
files_with_dates = file_dates.len(),
"Collected git file dates"
);
if file_dates.is_empty() {
tracing::warn!(
repo_root = %repo_root.display(),
"No file dates collected — git history may be shallow, the repo may be a bare \
clone, or the worktree walk encountered an unexpected layout"
);
}
Ok(file_dates)
}
fn tree_paths(tree: &gix::Tree<'_>, paths: &mut Vec<PathBuf>) -> Result<(), ScanError> {
let mut recorder = gix::traverse::tree::Recorder::default();
tree.traverse()
.breadthfirst(&mut recorder)
.map_err(|e| ScanError::GitError(format!("Failed to traverse tree: {e}")))?;
for entry in recorder.records {
if entry.mode.is_blob() {
paths.push(PathBuf::from(entry.filepath.to_string()));
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::process::Command;
use tempfile::tempdir;
fn init_git_repo(dir: &Path) {
Command::new("git")
.args(["init", "-b", "main"])
.current_dir(dir)
.output()
.expect("git init");
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(dir)
.output()
.expect("git config email");
Command::new("git")
.args(["config", "user.name", "Test User"])
.current_dir(dir)
.output()
.expect("git config name");
}
fn git_add_and_commit(dir: &Path, message: &str) {
Command::new("git")
.args(["add", "."])
.current_dir(dir)
.output()
.expect("git add");
Command::new("git")
.args(["commit", "-m", message, "--allow-empty-message"])
.current_dir(dir)
.output()
.expect("git commit");
}
#[test]
fn non_git_directory_returns_empty() {
let dir = tempdir().expect("tempdir");
let result = collect_git_file_dates(dir.path()).expect("should not error");
assert!(result.is_empty(), "non-git dir should return empty map");
}
#[test]
fn empty_repo_returns_empty() {
let dir = tempdir().expect("tempdir");
init_git_repo(dir.path());
let result = collect_git_file_dates(dir.path()).expect("should not error");
assert!(result.is_empty(), "empty repo should return empty map");
}
#[test]
fn collects_dates_for_committed_files() {
let dir = tempdir().expect("tempdir");
init_git_repo(dir.path());
fs::write(dir.path().join("hello.txt"), "hello").expect("write file");
git_add_and_commit(dir.path(), "first commit");
fs::write(dir.path().join("world.txt"), "world").expect("write file");
git_add_and_commit(dir.path(), "second commit");
let dates = collect_git_file_dates(dir.path()).expect("collect dates");
assert!(
dates.contains_key(&PathBuf::from("hello.txt")),
"should have hello.txt"
);
assert!(
dates.contains_key(&PathBuf::from("world.txt")),
"should have world.txt"
);
for (path, ts) in &dates {
assert!(
*ts > 0,
"timestamp for {} should be positive, got {}",
path.display(),
ts
);
}
}
#[test]
fn most_recent_date_wins() {
let dir = tempdir().expect("tempdir");
init_git_repo(dir.path());
fs::write(dir.path().join("file.txt"), "v1").expect("write");
git_add_and_commit(dir.path(), "first");
std::thread::sleep(std::time::Duration::from_secs(1));
fs::write(dir.path().join("file.txt"), "v2").expect("write");
git_add_and_commit(dir.path(), "second");
let dates = collect_git_file_dates(dir.path()).expect("collect dates");
let file_date = dates
.get(&PathBuf::from("file.txt"))
.expect("should have file.txt");
assert!(*file_date > 0, "should have a positive timestamp");
}
#[test]
fn handles_subdirectories() {
let dir = tempdir().expect("tempdir");
init_git_repo(dir.path());
let sub = dir.path().join("src");
fs::create_dir_all(&sub).expect("mkdir");
fs::write(sub.join("main.rs"), "fn main() {}").expect("write");
git_add_and_commit(dir.path(), "with subdirectory");
let dates = collect_git_file_dates(dir.path()).expect("collect dates");
assert!(
dates.contains_key(&PathBuf::from("src/main.rs")),
"should have src/main.rs, got keys: {:?}",
dates.keys().collect::<Vec<_>>()
);
}
#[test]
fn keys_are_relative_not_absolute() {
let dir = tempdir().expect("tempdir");
init_git_repo(dir.path());
fs::write(dir.path().join("config.toml"), "[package]").expect("write");
git_add_and_commit(dir.path(), "add config");
let dates = collect_git_file_dates(dir.path()).expect("collect dates");
assert!(
dates.contains_key(&PathBuf::from("config.toml")),
"relative path must be a key"
);
let abs = dir.path().join("config.toml");
assert!(
!dates.contains_key(abs.as_path()),
"absolute path must NOT be a key — callers must strip the root prefix"
);
}
}