use std::fs;
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use super::{hash, path_string, AsrError, AsrResult};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum RepoKind {
Worktree,
Bare,
}
#[derive(Debug, Clone)]
pub(crate) struct ResolvedRepo {
pub root: PathBuf,
pub kind: RepoKind,
}
#[derive(Debug, Clone)]
pub(crate) struct WorktreeStatus {
pub dirty: bool,
pub untracked: bool,
pub modified: bool,
pub worktree_fingerprint: String,
}
#[derive(Debug, Clone)]
pub(crate) struct GitDiffHunk {
pub path: String,
pub old_start: usize,
pub old_lines: usize,
pub new_start: usize,
pub new_lines: usize,
pub added_lines: usize,
pub removed_lines: usize,
pub context_lines: usize,
pub section: Option<String>,
}
pub(crate) fn canonical_git_root(path: &Path) -> AsrResult<PathBuf> {
if !path.exists() {
return Err(AsrError::with_path(
"repo_unreadable",
"Repository path does not exist",
path_string(path),
));
}
let output = git_output(path, &["rev-parse", "--show-toplevel"])?;
if !output.status.success() {
return Err(AsrError::with_path(
"repo_not_git",
"Path is not inside a Git repository",
path_string(path),
));
}
let root = String::from_utf8_lossy(&output.stdout).trim().to_string();
if root.is_empty() {
return Err(AsrError::with_path(
"repo_not_git",
"Git repository root could not be resolved",
path_string(path),
));
}
PathBuf::from(root).canonicalize().map_err(|err| {
AsrError::with_path(
"repo_unreadable",
format!("Git repository root is unreadable: {err}"),
path_string(path),
)
})
}
pub(crate) fn canonical_repo_root(path: &Path) -> AsrResult<ResolvedRepo> {
if !path.exists() {
return Err(AsrError::with_path(
"repo_unreadable",
"Repository path does not exist",
path_string(path),
));
}
if is_bare_repository(path)? {
let root = path.canonicalize().map_err(|err| {
AsrError::with_path(
"repo_unreadable",
format!("Bare Git repository root is unreadable: {err}"),
path_string(path),
)
})?;
return Ok(ResolvedRepo {
root,
kind: RepoKind::Bare,
});
}
canonical_git_root(path).map(|root| ResolvedRepo {
root,
kind: RepoKind::Worktree,
})
}
pub(crate) fn canonical_bare_root(path: &Path) -> AsrResult<PathBuf> {
let resolved = canonical_repo_root(path)?;
if resolved.kind != RepoKind::Bare {
return Err(AsrError::with_path(
"repo_kind_mismatch",
"Registered repository is not a bare Git repository",
path_string(path),
));
}
Ok(resolved.root)
}
pub(crate) fn current_branch(root: &Path) -> Option<String> {
let output = git_output(root, &["symbolic-ref", "--quiet", "--short", "HEAD"]).ok()?;
if !output.status.success() {
return None;
}
nonempty_stdout(output.stdout)
}
pub(crate) fn head_commit(root: &Path) -> Option<String> {
let output = git_output(root, &["rev-parse", "--verify", "HEAD"]).ok()?;
if !output.status.success() {
return None;
}
nonempty_stdout(output.stdout)
}
pub(crate) fn repository_status(root: &Path, bare: bool) -> AsrResult<WorktreeStatus> {
if bare {
return bare_repository_status(root);
}
working_tree_status(root)
}
pub(crate) fn working_tree_status(root: &Path) -> AsrResult<WorktreeStatus> {
let output = git_output(
root,
&["status", "--porcelain=v1", "--untracked-files=normal", "-z"],
)?;
if !output.status.success() {
return Err(AsrError::with_path(
"git_status_failed",
"Failed to read Git working tree status",
path_string(root),
));
}
let mut dirty = false;
let mut untracked = false;
let mut modified = false;
for entry in output
.stdout
.split(|byte| *byte == 0)
.filter(|entry| !entry.is_empty())
{
dirty = true;
if entry.starts_with(b"??") {
untracked = true;
} else {
modified = true;
}
}
let worktree_fingerprint = worktree_fingerprint(root, &output.stdout)?;
Ok(WorktreeStatus {
dirty,
untracked,
modified,
worktree_fingerprint,
})
}
pub(crate) fn checkout_head_tree(bare_root: &Path, destination: &Path) -> AsrResult<()> {
fs::create_dir_all(destination).map_err(|err| {
AsrError::with_path(
"bare_checkout_failed",
format!("Failed to create bare repository checkout directory: {err}"),
path_string(destination),
)
})?;
let output = Command::new("git")
.arg("--git-dir")
.arg(bare_root)
.arg("--work-tree")
.arg(destination)
.args(["checkout", "-f", "HEAD", "--", "."])
.output()
.map_err(|err| {
AsrError::with_path(
"git_command_failed",
format!("Failed to execute git checkout for bare repository: {err}"),
path_string(bare_root),
)
})?;
if output.status.success() {
return Ok(());
}
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
Err(AsrError::with_path(
"bare_checkout_failed",
if stderr.is_empty() {
"Failed to checkout bare repository HEAD".to_string()
} else {
stderr
},
path_string(bare_root),
))
}
pub(crate) fn show_head_file(bare_root: &Path, relative_path: &str) -> AsrResult<Vec<u8>> {
let spec = format!("HEAD:{relative_path}");
let output = git_output(bare_root, &["show", spec.as_str()])?;
if output.status.success() {
return Ok(output.stdout);
}
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
Err(AsrError::with_path(
"file_not_found",
if stderr.is_empty() {
"File does not exist in bare repository HEAD".to_string()
} else {
stderr
},
relative_path,
))
}
pub(crate) fn diff_hunks(root: &Path, base: &str, head: &str) -> AsrResult<Vec<GitDiffHunk>> {
verify_commit_ref(root, base, "base")?;
verify_commit_ref(root, head, "head")?;
let output = git_output(
root,
&[
"diff",
"--no-ext-diff",
"--no-color",
"--find-renames",
"--unified=3",
base,
head,
"--",
],
)?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
return Err(AsrError::with_path(
"git_diff_failed",
if stderr.is_empty() {
"Failed to read Git diff hunks".to_string()
} else {
stderr
},
path_string(root),
));
}
Ok(parse_diff_hunks(&String::from_utf8_lossy(&output.stdout)))
}
fn is_bare_repository(path: &Path) -> AsrResult<bool> {
let output = git_output(path, &["rev-parse", "--is-bare-repository"])?;
if !output.status.success() {
return Ok(false);
}
Ok(String::from_utf8_lossy(&output.stdout).trim() == "true")
}
fn bare_repository_status(root: &Path) -> AsrResult<WorktreeStatus> {
let tree = match git_output(root, &["rev-parse", "--verify", "HEAD^{tree}"]) {
Ok(output) if output.status.success() => {
nonempty_stdout(output.stdout).unwrap_or_else(|| "empty".to_string())
}
_ => "empty".to_string(),
};
Ok(WorktreeStatus {
dirty: false,
untracked: false,
modified: false,
worktree_fingerprint: format!("bare:{tree}"),
})
}
fn verify_commit_ref(root: &Path, reference: &str, field: &'static str) -> AsrResult<()> {
let spec = format!("{reference}^{{commit}}");
let output = git_output(root, &["rev-parse", "--verify", spec.as_str()])?;
if output.status.success() {
return Ok(());
}
Err(AsrError::with_path(
"invalid_git_ref",
format!(
"Invalid {field} Git ref: {}",
String::from_utf8_lossy(&output.stderr).trim()
),
path_string(root),
))
}
const MAX_UNTRACKED_CONTENT_BYTES: u64 = 512 * 1024;
const MAX_DIFF_HASH_BYTES: usize = 1024 * 1024;
fn hash_capped(h: &mut u64, prefix: &[u8], data: &[u8]) {
hash::update_hash(h, prefix);
if data.len() <= MAX_DIFF_HASH_BYTES * 2 {
hash::update_hash(h, data);
} else {
hash::update_hash(h, &data[..MAX_DIFF_HASH_BYTES]);
hash::update_hash(h, &data[data.len() - MAX_DIFF_HASH_BYTES..]);
hash::update_hash(h, data.len().to_string().as_bytes());
}
}
fn worktree_fingerprint(root: &Path, status_bytes: &[u8]) -> AsrResult<String> {
let mut h = hash::FNV_OFFSET;
hash::update_hash(&mut h, b"status");
hash::update_hash(&mut h, status_bytes);
match git_output(root, &["diff", "--no-ext-diff", "--no-color"]) {
Ok(output) => hash_capped(&mut h, b"diff-worktree", &output.stdout),
Err(err) => log::debug!(
"git diff failed while computing worktree fingerprint for {}: {}",
path_string(root),
err.message
),
}
match git_output(root, &["diff", "--cached", "--no-ext-diff", "--no-color"]) {
Ok(output) => hash_capped(&mut h, b"diff-index", &output.stdout),
Err(err) => log::debug!(
"git diff --cached failed while computing worktree fingerprint for {}: {}",
path_string(root),
err.message
),
}
match git_output(root, &["ls-files", "--others", "--exclude-standard", "-z"]) {
Ok(output) => {
hash::update_hash(&mut h, b"untracked");
hash::update_hash(&mut h, &output.stdout);
for path in output
.stdout
.split(|byte| *byte == 0)
.filter(|path| !path.is_empty())
{
if let Ok(relative) = std::str::from_utf8(path) {
let full_path = root.join(relative);
if let Ok(metadata) = fs::metadata(&full_path) {
if metadata.is_file() {
hash::update_hash(&mut h, relative.as_bytes());
hash::update_hash(&mut h, metadata.len().to_string().as_bytes());
if metadata.len() <= MAX_UNTRACKED_CONTENT_BYTES {
if let Ok(bytes) = fs::read(&full_path) {
hash::update_hash(&mut h, &bytes);
}
}
}
}
}
}
}
Err(err) => log::debug!(
"git ls-files failed while computing worktree fingerprint for {}: {}",
path_string(root),
err.message
),
}
Ok(format!("{h:016x}"))
}
fn parse_diff_hunks(diff: &str) -> Vec<GitDiffHunk> {
let mut hunks = Vec::new();
let mut current_path: Option<String> = None;
let mut old_path: Option<String> = None;
let mut current_hunk: Option<GitDiffHunk> = None;
for line in diff.lines() {
if line.starts_with("diff --git ") {
push_hunk(&mut hunks, &mut current_hunk);
current_path = parse_diff_git_path(line);
old_path = current_path.clone();
continue;
}
if let Some(path) = line.strip_prefix("--- a/") {
old_path = Some(clean_diff_path(path));
continue;
}
if line == "--- /dev/null" {
old_path = None;
continue;
}
if let Some(path) = line.strip_prefix("+++ b/") {
current_path = Some(clean_diff_path(path));
continue;
}
if line == "+++ /dev/null" {
current_path = old_path.clone();
continue;
}
if let Some(rest) = line.strip_prefix("Binary files ") {
if let Some((a_part, b_part)) = rest.split_once(" and ") {
let new_path = if b_part == "/dev/null differ" || b_part == "/dev/null" {
a_part.strip_prefix("a/").map(clean_diff_path)
} else {
b_part
.strip_prefix("b/")
.map(|p| p.trim_end_matches(" differ"))
.map(clean_diff_path)
};
if let Some(path) = new_path {
current_path = Some(path.clone());
old_path = Some(path);
}
}
continue;
}
if let Some(header) = line.strip_prefix("@@ ") {
push_hunk(&mut hunks, &mut current_hunk);
if let Some((old_start, old_lines, new_start, new_lines, section)) =
parse_hunk_header(header)
{
let path = current_path
.clone()
.or_else(|| old_path.clone())
.unwrap_or_else(|| "unknown".to_string());
current_hunk = Some(GitDiffHunk {
path,
old_start,
old_lines,
new_start,
new_lines,
added_lines: 0,
removed_lines: 0,
context_lines: 0,
section,
});
}
continue;
}
if let Some(hunk) = current_hunk.as_mut() {
if line.starts_with('+') && !line.starts_with("+++") {
hunk.added_lines += 1;
} else if line.starts_with('-') && !line.starts_with("---") {
hunk.removed_lines += 1;
} else if line.starts_with(' ') {
hunk.context_lines += 1;
}
}
}
push_hunk(&mut hunks, &mut current_hunk);
hunks
}
fn push_hunk(hunks: &mut Vec<GitDiffHunk>, current_hunk: &mut Option<GitDiffHunk>) {
if let Some(hunk) = current_hunk.take() {
hunks.push(hunk);
}
}
fn parse_diff_git_path(line: &str) -> Option<String> {
let rest = line.strip_prefix("diff --git a/")?;
let mut search_start = 0;
while let Some(idx) = rest[search_start..].find(" b/") {
let abs = search_start + idx;
let a_candidate = &rest[..abs];
let b_candidate = &rest[abs + 3..];
if a_candidate == b_candidate {
return Some(clean_diff_path(b_candidate));
}
search_start = abs + 1;
}
rest.rsplit_once(" b/")
.map(|(_, path)| clean_diff_path(path))
}
fn clean_diff_path(path: &str) -> String {
path.split('\t')
.next()
.unwrap_or(path)
.trim()
.trim_matches('"')
.replace('\\', "/")
}
fn parse_hunk_header(header_tail: &str) -> Option<(usize, usize, usize, usize, Option<String>)> {
let (ranges, section) = match header_tail.split_once(" @@") {
Some((ranges, section)) => {
let section = section.trim();
let section = if section.is_empty() {
None
} else {
Some(section.to_string())
};
(ranges, section)
}
None => (header_tail.trim_end_matches("@@"), None),
};
let mut parts = ranges.split_whitespace();
let old = parts.next()?;
let new = parts.next()?;
let (old_start, old_lines) = parse_hunk_range(old, '-')?;
let (new_start, new_lines) = parse_hunk_range(new, '+')?;
Some((old_start, old_lines, new_start, new_lines, section))
}
fn parse_hunk_range(value: &str, prefix: char) -> Option<(usize, usize)> {
let value = value.strip_prefix(prefix)?;
let (start, lines) = match value.split_once(',') {
Some((start, lines)) => (start, lines),
None => (value, "1"),
};
Some((start.parse().ok()?, lines.parse().ok()?))
}
fn git_output(path: &Path, args: &[&str]) -> AsrResult<Output> {
Command::new("git")
.arg("-C")
.arg(path)
.args(args)
.output()
.map_err(|err| {
AsrError::with_path(
"git_command_failed",
format!("Failed to execute git: {err}"),
path_string(path),
)
})
}
fn nonempty_stdout(stdout: Vec<u8>) -> Option<String> {
let value = String::from_utf8_lossy(&stdout).trim().to_string();
if value.is_empty() {
None
} else {
Some(value)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn binary_diff_path_with_space_b_in_filename_is_correct() {
let diff = "diff --git a/src/a b/utils.bin b/src/a b/utils.bin\n\
index abc..def 100644\n\
Binary files a/src/a b/utils.bin and b/src/a b/utils.bin differ\n";
let hunks = parse_diff_hunks(diff);
assert!(hunks.is_empty());
}
#[test]
fn binary_diff_deleted_file_uses_old_path() {
let diff = "diff --git a/logo.png b/logo.png\n\
deleted file mode 100644\n\
index abc..0000000\n\
Binary files a/logo.png and /dev/null differ\n";
let hunks = parse_diff_hunks(diff);
assert!(hunks.is_empty());
}
#[test]
fn text_diff_hunk_path_is_correct() {
let diff = "diff --git a/src/lib.rs b/src/lib.rs\n\
index abc..def 100644\n\
--- a/src/lib.rs\n\
+++ b/src/lib.rs\n\
@@ -1,3 +1,4 @@\n\
fn foo() {}\n\
+fn bar() {}\n";
let hunks = parse_diff_hunks(diff);
assert_eq!(hunks.len(), 1);
assert_eq!(hunks[0].path, "src/lib.rs");
assert_eq!(hunks[0].added_lines, 1);
}
#[test]
fn new_file_diff_hunk_path_is_correct() {
let diff = "diff --git a/new.rs b/new.rs\n\
new file mode 100644\n\
index 0000000..abc\n\
--- /dev/null\n\
+++ b/new.rs\n\
@@ -0,0 +1,2 @@\n\
+fn new() {}\n\
+fn also_new() {}\n";
let hunks = parse_diff_hunks(diff);
assert_eq!(hunks.len(), 1);
assert_eq!(hunks[0].path, "new.rs");
assert_eq!(hunks[0].added_lines, 2);
}
}