use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::{Arc, Mutex};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
pub fn collect_tracked_paths(root: &Path) -> Option<HashSet<PathBuf>> {
let output = Command::new("git")
.arg("-C")
.arg(root)
.args(["ls-files", "-z"])
.output()
.ok()?;
if !output.status.success() {
return None;
}
let mut out = HashSet::new();
for chunk in output.stdout.split(|&b| b == 0) {
if chunk.is_empty() {
continue;
}
let s = std::str::from_utf8(chunk).ok()?;
out.insert(PathBuf::from(s));
}
Some(out)
}
pub fn collect_changed_paths(root: &Path, base: Option<&str>) -> Option<HashSet<PathBuf>> {
let output = match base {
Some(base) => Command::new("git")
.arg("-C")
.arg(root)
.args(["diff", "--name-only", "--relative", "-z"])
.arg(format!("{base}...HEAD"))
.output()
.ok()?,
None => Command::new("git")
.arg("-C")
.arg(root)
.args([
"ls-files",
"--modified",
"--others",
"--exclude-standard",
"-z",
])
.output()
.ok()?,
};
if !output.status.success() {
return None;
}
let mut out = HashSet::new();
for chunk in output.stdout.split(|&b| b == 0) {
if chunk.is_empty() {
continue;
}
let s = std::str::from_utf8(chunk).ok()?;
out.insert(PathBuf::from(s));
}
Some(out)
}
pub fn head_commit_message(root: &Path) -> Option<String> {
let output = Command::new("git")
.arg("-C")
.arg(root)
.args(["log", "-1", "--format=%B"])
.output()
.ok()?;
if !output.status.success() {
return None;
}
let raw = String::from_utf8(output.stdout).ok()?;
Some(raw.trim_end_matches('\n').to_string())
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CommitRecord {
pub sha: String,
pub message: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CommitRangeError {
BadRange { stderr: String },
}
pub fn commit_messages_in_range(
root: &Path,
since: &str,
include_merges: bool,
) -> Result<Option<Vec<CommitRecord>>, CommitRangeError> {
let probe = Command::new("git")
.arg("-C")
.arg(root)
.args(["rev-parse", "--git-dir"])
.output();
let Ok(probe) = probe else {
return Ok(None);
};
if !probe.status.success() {
return Ok(None);
}
let range = format!("{since}..HEAD");
let mut cmd = Command::new("git");
cmd.arg("-C").arg(root).args([
"log",
"--reverse",
"--abbrev-commit",
"--format=%h%x00%B%x1e",
]);
if !include_merges {
cmd.arg("--no-merges");
}
cmd.arg(&range);
let Ok(output) = cmd.output() else {
return Ok(None);
};
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
return Err(CommitRangeError::BadRange { stderr });
}
Ok(Some(parse_commit_log(&output.stdout)))
}
fn parse_commit_log(stdout: &[u8]) -> Vec<CommitRecord> {
let mut out = Vec::new();
for record in stdout.split(|&b| b == 0x1e) {
if record.is_empty() {
continue;
}
let record = record.strip_prefix(b"\n").unwrap_or(record);
let mut parts = record.splitn(2, |&b| b == 0);
let Some(sha_bytes) = parts.next() else {
continue;
};
let Some(msg_bytes) = parts.next() else {
continue;
};
let Ok(sha) = std::str::from_utf8(sha_bytes) else {
continue;
};
let Ok(msg) = std::str::from_utf8(msg_bytes) else {
continue;
};
let message = msg.trim_end_matches('\n').to_string();
out.push(CommitRecord {
sha: sha.to_string(),
message,
});
}
out
}
#[derive(Debug, Clone)]
pub struct BlameLine {
pub line_number: usize,
pub author_time: SystemTime,
pub content: String,
}
pub fn blame_lines(root: &Path, rel_path: &Path) -> Option<Vec<BlameLine>> {
let output = Command::new("git")
.arg("-C")
.arg(root)
.args(["blame", "--line-porcelain", "--"])
.arg(rel_path)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let text = std::str::from_utf8(&output.stdout).ok()?;
Some(parse_porcelain(text))
}
fn parse_porcelain(text: &str) -> Vec<BlameLine> {
let mut out = Vec::new();
let mut final_line: Option<usize> = None;
let mut author_time: Option<SystemTime> = None;
for line in text.lines() {
if let Some(rest) = line.strip_prefix('\t') {
if let (Some(n), Some(t)) = (final_line.take(), author_time.take()) {
out.push(BlameLine {
line_number: n,
author_time: t,
content: rest.to_string(),
});
}
continue;
}
let mut parts = line.splitn(2, ' ');
let key = parts.next().unwrap_or("");
let value = parts.next().unwrap_or("");
match key {
"author-time" => {
if let Ok(secs) = value.parse::<u64>() {
author_time = Some(UNIX_EPOCH + Duration::from_secs(secs));
}
}
sha if sha.len() == 40 && sha.chars().all(|c| c.is_ascii_hexdigit()) => {
let mut cols = value.split(' ');
let _orig = cols.next();
if let Some(final_str) = cols.next()
&& let Ok(n) = final_str.parse::<usize>()
{
final_line = Some(n);
}
}
_ => {}
}
}
out
}
#[derive(Debug)]
pub struct BlameCache {
root: PathBuf,
inner: Mutex<HashMap<PathBuf, CacheEntry>>,
}
#[derive(Debug, Clone)]
enum CacheEntry {
Ok(Arc<Vec<BlameLine>>),
Failed,
}
impl BlameCache {
pub fn new(root: PathBuf) -> Self {
Self {
root,
inner: Mutex::new(HashMap::new()),
}
}
pub fn get(&self, rel_path: &Path) -> Option<Arc<Vec<BlameLine>>> {
let mut guard = self.inner.lock().expect("blame cache lock poisoned");
if let Some(entry) = guard.get(rel_path) {
return match entry {
CacheEntry::Ok(arc) => Some(Arc::clone(arc)),
CacheEntry::Failed => None,
};
}
let computed = blame_lines(&self.root, rel_path);
if let Some(v) = computed {
let arc = Arc::new(v);
guard.insert(rel_path.to_path_buf(), CacheEntry::Ok(Arc::clone(&arc)));
Some(arc)
} else {
guard.insert(rel_path.to_path_buf(), CacheEntry::Failed);
None
}
}
}
pub fn dir_has_tracked_files<S>(
dir_rel: &Path,
tracked: &std::collections::HashSet<PathBuf, S>,
) -> bool
where
S: std::hash::BuildHasher,
{
tracked.iter().any(|p| p.starts_with(dir_rel))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn collect_returns_none_outside_git() {
let tmp = tempfile::tempdir().unwrap();
let result = collect_tracked_paths(tmp.path());
assert!(result.is_none());
}
#[test]
fn collect_changed_returns_none_outside_git() {
let tmp = tempfile::tempdir().unwrap();
assert!(collect_changed_paths(tmp.path(), None).is_none());
assert!(collect_changed_paths(tmp.path(), Some("main")).is_none());
}
#[test]
fn head_message_returns_none_outside_git() {
let tmp = tempfile::tempdir().unwrap();
assert!(head_commit_message(tmp.path()).is_none());
}
#[test]
fn parse_porcelain_two_lines_two_commits() {
let porcelain = "\
abcd1234abcd1234abcd1234abcd1234abcd1234 1 1 1
author Old Author
author-mail <old@example.com>
author-time 1700000000
author-tz +0000
committer Old Author
committer-mail <old@example.com>
committer-time 1700000000
committer-tz +0000
summary first commit
filename src/main.rs
\told line content
ef01ef01ef01ef01ef01ef01ef01ef01ef01ef01 2 2 1
author New Author
author-mail <new@example.com>
author-time 1750000000
author-tz +0000
committer New Author
committer-mail <new@example.com>
committer-time 1750000000
committer-tz +0000
summary recent commit
filename src/main.rs
\tnew line content
";
let lines = parse_porcelain(porcelain);
assert_eq!(lines.len(), 2);
assert_eq!(lines[0].line_number, 1);
assert_eq!(lines[0].content, "old line content");
assert_eq!(
lines[0].author_time,
UNIX_EPOCH + Duration::from_secs(1_700_000_000)
);
assert_eq!(lines[1].line_number, 2);
assert_eq!(lines[1].content, "new line content");
assert_eq!(
lines[1].author_time,
UNIX_EPOCH + Duration::from_secs(1_750_000_000)
);
}
#[test]
fn parse_porcelain_handles_previous_marker() {
let porcelain = "\
abcd1234abcd1234abcd1234abcd1234abcd1234 5 5 1
author X
author-mail <x@example.com>
author-time 1700000000
author-tz +0000
committer X
committer-mail <x@example.com>
committer-time 1700000000
committer-tz +0000
summary did a thing
previous 1111111111111111111111111111111111111111 src/old.rs
filename src/main.rs
\tline body
";
let lines = parse_porcelain(porcelain);
assert_eq!(lines.len(), 1);
assert_eq!(lines[0].line_number, 5);
assert_eq!(lines[0].content, "line body");
}
#[test]
fn parse_porcelain_skips_blocks_missing_metadata() {
let porcelain = "\
abcd1234abcd1234abcd1234abcd1234abcd1234 1 1 1
author X
author-time not-a-number
filename a.rs
\tbroken
ef01ef01ef01ef01ef01ef01ef01ef01ef01ef01 2 2 1
author Y
author-time 1700000000
filename a.rs
\tworks
";
let lines = parse_porcelain(porcelain);
assert_eq!(lines.len(), 1);
assert_eq!(lines[0].content, "works");
}
#[test]
fn blame_lines_returns_none_outside_git() {
let tmp = tempfile::tempdir().unwrap();
let result = blame_lines(tmp.path(), Path::new("missing.rs"));
assert!(result.is_none());
}
#[test]
fn blame_cache_memoises_failure() {
let tmp = tempfile::tempdir().unwrap();
let cache = BlameCache::new(tmp.path().to_path_buf());
assert!(cache.get(Path::new("missing.rs")).is_none());
assert!(cache.get(Path::new("missing.rs")).is_none());
let guard = cache.inner.lock().unwrap();
assert!(matches!(
guard.get(Path::new("missing.rs")),
Some(CacheEntry::Failed)
));
}
#[test]
fn dir_has_tracked_files_walks_prefix() {
let mut set = HashSet::new();
set.insert(PathBuf::from("src/main.rs"));
set.insert(PathBuf::from("README.md"));
assert!(dir_has_tracked_files(Path::new("src"), &set));
assert!(!dir_has_tracked_files(Path::new("target"), &set));
assert!(!dir_has_tracked_files(Path::new("tar"), &set));
}
fn make_repo_with_commits(subjects: &[&str]) -> tempfile::TempDir {
let tmp = tempfile::tempdir().unwrap();
let init_dir = tmp.path();
for args in [
vec!["init", "-q", "-b", "main"],
vec!["config", "user.email", "test@example.com"],
vec!["config", "user.name", "Test"],
vec!["config", "commit.gpgsign", "false"],
] {
let out = Command::new("git")
.arg("-C")
.arg(init_dir)
.args(&args)
.output()
.unwrap();
assert!(out.status.success(), "git {args:?} failed");
}
for subject in subjects {
let out = Command::new("git")
.arg("-C")
.arg(init_dir)
.args(["commit", "--allow-empty", "-m", subject])
.output()
.unwrap();
assert!(
out.status.success(),
"git commit failed: stderr={}",
String::from_utf8_lossy(&out.stderr)
);
}
tmp
}
#[test]
fn parse_commit_log_empty_input() {
assert!(parse_commit_log(b"").is_empty());
}
#[test]
fn parse_commit_log_single_commit() {
let raw = b"abc1234\0subject line\n\nbody line one\nbody line two\n\x1e";
let records = parse_commit_log(raw);
assert_eq!(records.len(), 1);
assert_eq!(records[0].sha, "abc1234");
assert_eq!(
records[0].message,
"subject line\n\nbody line one\nbody line two"
);
}
#[test]
fn parse_commit_log_multiple_commits() {
let raw = b"a1\0first\n\x1e\nb2\0second\n\x1e";
let records = parse_commit_log(raw);
assert_eq!(records.len(), 2);
assert_eq!(records[0].sha, "a1");
assert_eq!(records[0].message, "first");
assert_eq!(records[1].sha, "b2");
assert_eq!(records[1].message, "second");
}
#[test]
fn parse_commit_log_subject_only_no_body() {
let raw = b"deadbef\0just the subject\n\x1e";
let records = parse_commit_log(raw);
assert_eq!(records.len(), 1);
assert_eq!(records[0].message, "just the subject");
}
#[test]
fn parse_commit_log_preserves_blank_lines_in_body() {
let raw = b"sha7777\0fix: thing\n\nfirst paragraph.\n\nsecond paragraph.\n\nthird.\n\x1e";
let records = parse_commit_log(raw);
assert_eq!(records.len(), 1);
assert_eq!(
records[0].message,
"fix: thing\n\nfirst paragraph.\n\nsecond paragraph.\n\nthird."
);
}
#[test]
fn parse_commit_log_skips_record_with_invalid_utf8() {
let mut raw: Vec<u8> = b"abc1234\0".to_vec();
raw.extend_from_slice(&[0xff, 0xfe, 0xfd]); raw.push(0x1e);
let records = parse_commit_log(&raw);
assert!(records.is_empty());
}
#[test]
fn commit_range_returns_none_outside_git() {
let tmp = tempfile::tempdir().unwrap();
let result = commit_messages_in_range(tmp.path(), "main", false);
assert!(matches!(result, Ok(None)));
}
#[test]
fn commit_range_returns_empty_vec_for_head_to_head() {
let repo = make_repo_with_commits(&["feat: first commit"]);
let result = commit_messages_in_range(repo.path(), "HEAD", false).unwrap();
assert_eq!(result, Some(Vec::new()));
}
#[test]
fn commit_range_enumerates_real_commits_oldest_first() {
let repo =
make_repo_with_commits(&["root: zero", "feat: alpha", "fix: beta", "chore: gamma"]);
let root_sha = String::from_utf8(
Command::new("git")
.arg("-C")
.arg(repo.path())
.args(["rev-parse", "HEAD~3"])
.output()
.unwrap()
.stdout,
)
.unwrap()
.trim()
.to_string();
let records = commit_messages_in_range(repo.path(), &root_sha, false)
.unwrap()
.unwrap();
assert_eq!(records.len(), 3);
assert_eq!(records[0].message, "feat: alpha");
assert_eq!(records[1].message, "fix: beta");
assert_eq!(records[2].message, "chore: gamma");
for r in &records {
assert!(r.sha.len() >= 7);
assert!(r.sha.chars().all(|c| c.is_ascii_hexdigit()));
}
}
#[test]
fn commit_range_skips_merges_by_default() {
let repo = make_repo_with_commits(&["init commit on main"]);
let root = repo.path();
let run = |args: &[&str]| {
let out = Command::new("git")
.arg("-C")
.arg(root)
.args(args)
.output()
.unwrap();
assert!(
out.status.success(),
"git {args:?} failed: {}",
String::from_utf8_lossy(&out.stderr)
);
String::from_utf8(out.stdout).unwrap()
};
let base_sha = run(&["rev-parse", "HEAD"]).trim().to_string();
run(&["checkout", "-q", "-b", "feature"]);
run(&["commit", "--allow-empty", "-m", "feat: A"]);
run(&["commit", "--allow-empty", "-m", "fix: B"]);
run(&["checkout", "-q", "main"]);
run(&["merge", "--no-ff", "--no-edit", "feature"]);
let records = commit_messages_in_range(root, &base_sha, false)
.unwrap()
.unwrap();
let subjects: Vec<&str> = records.iter().map(|r| r.message.as_str()).collect();
assert_eq!(subjects, vec!["feat: A", "fix: B"]);
let with_merge = commit_messages_in_range(root, &base_sha, true)
.unwrap()
.unwrap();
assert_eq!(with_merge.len(), 3);
assert!(with_merge.iter().any(|r| r.message.starts_with("Merge ")));
}
#[test]
fn commit_range_returns_bad_range_for_unknown_ref() {
let repo = make_repo_with_commits(&["init"]);
let result = commit_messages_in_range(repo.path(), "does-not-exist-ref", false);
match result {
Err(CommitRangeError::BadRange { stderr }) => {
assert!(!stderr.is_empty());
}
other => panic!("expected BadRange, got {other:?}"),
}
}
}