use super::{GitBackend, GitError, Result, SubprocessGit};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct RecencyIndex {
by_file: HashMap<PathBuf, i64>,
min_ts: i64,
max_ts: i64,
repo_root: PathBuf,
}
impl RecencyIndex {
#[inline]
#[allow(clippy::cast_precision_loss)] fn to_f32_lossy(value: i64) -> f32 {
value as f32
}
pub fn from_repo(root: &Path) -> Result<Self> {
let backend = SubprocessGit::new();
let repo_root = backend.repo_root(root)?;
let tracked_files = Self::get_tracked_files(&repo_root)?;
if tracked_files.is_empty() {
return Ok(Self {
by_file: HashMap::new(),
min_ts: 0,
max_ts: 0,
repo_root,
});
}
let mut by_file = HashMap::new();
let mut min_ts = i64::MAX;
let mut max_ts = i64::MIN;
for file_path in tracked_files {
if let Some(timestamp) = Self::get_file_timestamp(&repo_root, &file_path)? {
min_ts = min_ts.min(timestamp);
max_ts = max_ts.max(timestamp);
by_file.insert(file_path, timestamp);
}
}
if min_ts == max_ts {
log::debug!(
"RecencyIndex: All files have identical timestamps ({min_ts}), scores will be neutral 0.5"
);
}
Ok(Self {
by_file,
min_ts,
max_ts,
repo_root,
})
}
#[must_use]
pub fn from_timestamps(by_file: HashMap<PathBuf, i64>, repo_root: &Path) -> Self {
if by_file.is_empty() {
return Self {
by_file,
min_ts: 0,
max_ts: 0,
repo_root: repo_root.to_path_buf(),
};
}
let min_ts = *by_file.values().min().expect("by_file is not empty");
let max_ts = *by_file.values().max().expect("by_file is not empty");
Self {
by_file,
min_ts,
max_ts,
repo_root: repo_root.to_path_buf(),
}
}
#[must_use]
pub fn score_for_file(&self, path: &Path) -> f32 {
if self.by_file.is_empty() {
return 0.5;
}
let timestamp = self
.by_file
.get(path)
.or_else(|| {
if path.is_absolute() {
path.strip_prefix(&self.repo_root)
.ok()
.and_then(|rel| self.by_file.get(rel))
} else {
None
}
})
.or_else(|| {
if path.is_relative() {
let abs = self.repo_root.join(path);
self.by_file.get(&abs)
} else {
None
}
});
let Some(&ts) = timestamp else {
return 0.5;
};
if self.max_ts == self.min_ts {
0.5
} else {
let score = Self::to_f32_lossy(ts - self.min_ts)
/ Self::to_f32_lossy(self.max_ts - self.min_ts);
score.clamp(0.0, 1.0)
}
}
#[must_use]
pub fn file_count(&self) -> usize {
self.by_file.len()
}
#[must_use]
pub fn repo_root(&self) -> &Path {
&self.repo_root
}
#[must_use]
pub fn timestamp_range(&self) -> Option<(i64, i64)> {
if self.by_file.is_empty() {
None
} else {
Some((self.min_ts, self.max_ts))
}
}
fn get_tracked_files(repo_root: &Path) -> Result<Vec<PathBuf>> {
let stdout = SubprocessGit::execute_git(
&["-C", &repo_root.display().to_string(), "ls-files", "-z"],
None, )?;
let files: Vec<PathBuf> = stdout
.split('\0')
.filter(|s| !s.is_empty())
.map(PathBuf::from)
.collect();
Ok(files)
}
fn get_file_timestamp(repo_root: &Path, file_path: &Path) -> Result<Option<i64>> {
let repo_root_str = repo_root.display().to_string();
let file_path_str = file_path.display().to_string();
let args = vec![
"-C",
&repo_root_str,
"log",
"-1",
"--format=%ct",
"--",
&file_path_str,
];
let stdout = SubprocessGit::execute_git(&args, None)?;
if stdout.trim().is_empty() {
return Ok(None);
}
let timestamp: i64 = stdout.trim().parse().map_err(|e| {
GitError::InvalidOutput(format!(
"Failed to parse timestamp '{}' for {}: {e}",
stdout.trim(),
file_path.display()
))
})?;
Ok(Some(timestamp))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::process::Command;
use tempfile::TempDir;
const SCORE_EPSILON: f32 = 1.0e-6;
fn assert_score_close(actual: f32, expected: f32) {
assert!(
(actual - expected).abs() < SCORE_EPSILON,
"expected {expected}, got {actual}"
);
}
fn create_test_repo_with_timestamps() -> (TempDir, Vec<(&'static str, i64)>) {
let tmpdir = tempfile::tempdir().unwrap();
let path = tmpdir.path();
let init = Command::new("git")
.args(["init"])
.current_dir(path)
.output()
.expect("git init failed");
assert!(init.status.success());
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@example.com"])
.current_dir(path)
.output()
.unwrap();
Command::new("git")
.args(["config", "commit.gpgSign", "false"])
.current_dir(path)
.output()
.unwrap();
let files = vec![
("old.rs", 1000i64), ("mid.rs", 2000i64), ("new.rs", 3000i64), ];
for (filename, timestamp) in &files {
fs::write(path.join(filename), format!("// {filename}")).unwrap();
Command::new("git")
.args(["add", filename])
.current_dir(path)
.output()
.unwrap();
let commit = Command::new("git")
.env("GIT_COMMITTER_DATE", timestamp.to_string())
.env("GIT_AUTHOR_DATE", timestamp.to_string())
.args(["commit", "-m", &format!("Add {filename}")])
.current_dir(path)
.output()
.unwrap();
assert!(
commit.status.success(),
"commit failed for {filename}: {commit:?}"
);
}
(tmpdir, files)
}
#[test]
fn test_from_timestamps_normalization() {
let timestamps = HashMap::from([
(PathBuf::from("old.rs"), 1000),
(PathBuf::from("mid.rs"), 2000),
(PathBuf::from("new.rs"), 3000),
]);
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/repo"));
assert_score_close(index.score_for_file(Path::new("old.rs")), 0.0); assert_score_close(index.score_for_file(Path::new("mid.rs")), 0.5); assert_score_close(index.score_for_file(Path::new("new.rs")), 1.0); }
#[test]
fn test_from_timestamps_ordering() {
let timestamps = HashMap::from([
(PathBuf::from("old.rs"), 1000),
(PathBuf::from("mid.rs"), 2000),
(PathBuf::from("new.rs"), 3000),
]);
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/repo"));
let old_score = index.score_for_file(Path::new("old.rs"));
let mid_score = index.score_for_file(Path::new("mid.rs"));
let new_score = index.score_for_file(Path::new("new.rs"));
assert!(new_score > mid_score);
assert!(mid_score > old_score);
}
#[test]
fn test_from_timestamps_missing_file() {
let timestamps = HashMap::from([
(PathBuf::from("old.rs"), 1000),
(PathBuf::from("new.rs"), 3000),
]);
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/repo"));
assert_score_close(index.score_for_file(Path::new("missing.rs")), 0.5);
}
#[test]
fn test_from_timestamps_identical_timestamps() {
let timestamps = HashMap::from([
(PathBuf::from("a.rs"), 1000),
(PathBuf::from("b.rs"), 1000),
(PathBuf::from("c.rs"), 1000),
]);
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/repo"));
assert_score_close(index.score_for_file(Path::new("a.rs")), 0.5);
assert_score_close(index.score_for_file(Path::new("b.rs")), 0.5);
assert_score_close(index.score_for_file(Path::new("c.rs")), 0.5);
}
#[test]
fn test_from_timestamps_empty() {
let timestamps = HashMap::new();
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/repo"));
assert_score_close(index.score_for_file(Path::new("any.rs")), 0.5);
assert_eq!(index.file_count(), 0);
}
#[test]
#[ignore = "Requires git binary and filesystem access"]
fn test_from_repo_real_git() {
let (tmpdir, _files) = create_test_repo_with_timestamps();
let index = RecencyIndex::from_repo(tmpdir.path()).unwrap();
assert_eq!(index.file_count(), 3);
let old_score = index.score_for_file(Path::new("old.rs"));
let mid_score = index.score_for_file(Path::new("mid.rs"));
let new_score = index.score_for_file(Path::new("new.rs"));
assert!(
new_score > mid_score,
"new ({new_score}) should be > mid ({mid_score})"
);
assert!(
mid_score > old_score,
"mid ({mid_score}) should be > old ({old_score})"
);
assert!(
new_score > 0.9,
"newest file should score > 0.9, got {new_score}"
);
assert!(
old_score < 0.1,
"oldest file should score < 0.1, got {old_score}"
);
}
#[test]
#[ignore = "Requires git binary and filesystem access"]
fn test_from_repo_absolute_and_relative_paths() {
let (tmpdir, _files) = create_test_repo_with_timestamps();
let index = RecencyIndex::from_repo(tmpdir.path()).unwrap();
let rel_score = index.score_for_file(Path::new("new.rs"));
let abs_path = tmpdir.path().join("new.rs");
let abs_score = index.score_for_file(&abs_path);
assert_score_close(rel_score, abs_score);
}
#[test]
fn test_repo_root_accessor() {
let timestamps = HashMap::from([(PathBuf::from("test.rs"), 1000)]);
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/test/repo"));
assert_eq!(index.repo_root(), Path::new("/test/repo"));
}
#[test]
fn test_timestamp_range() {
let timestamps = HashMap::from([
(PathBuf::from("old.rs"), 1000),
(PathBuf::from("new.rs"), 5000),
]);
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/repo"));
assert_eq!(index.timestamp_range(), Some((1000, 5000)));
let empty = RecencyIndex::from_timestamps(HashMap::new(), Path::new("/repo"));
assert_eq!(empty.timestamp_range(), None);
}
#[test]
fn test_file_count() {
let timestamps = HashMap::from([
(PathBuf::from("a.rs"), 1000),
(PathBuf::from("b.rs"), 2000),
(PathBuf::from("c.rs"), 3000),
]);
let index = RecencyIndex::from_timestamps(timestamps, Path::new("/repo"));
assert_eq!(index.file_count(), 3);
}
}