use std::collections::{HashMap, HashSet};
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use anyhow::{anyhow, Context, Result};
use crate::vcs::shared::run_vcs_with_retry;
use crate::vcs::UpstreamDivergence;
const MERGE_TREE_MIN_VERSION: (u32, u32) = (2, 38);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct GitVersion {
pub major: u32,
pub minor: u32,
pub patch: u32,
}
impl GitVersion {
pub fn at_least(&self, major: u32, minor: u32) -> bool {
(self.major, self.minor) >= (major, minor)
}
}
impl std::fmt::Display for GitVersion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}.{}.{}", self.major, self.minor, self.patch)
}
}
pub fn get_git_version() -> Result<GitVersion> {
let output = Command::new("git")
.args(["--version"])
.output()
.context("Failed to run git --version")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow!("git --version failed: {}", stderr.trim()));
}
let version_str = String::from_utf8_lossy(&output.stdout);
parse_git_version(&version_str)
}
pub(super) fn is_transient_error(stderr: &str) -> bool {
stderr.contains(".lock")
}
pub fn is_index_locked(repo_path: &Path) -> bool {
repo_path.join(".git/index.lock").exists()
}
pub(super) fn parse_git_version(s: &str) -> Result<GitVersion> {
let version_part = s
.trim()
.strip_prefix("git version ")
.ok_or_else(|| anyhow!("Unexpected git version format: {}", s))?;
let version_num = version_part.split_whitespace().next().unwrap_or(version_part);
let parts: Vec<&str> = version_num.split('.').collect();
if parts.len() < 2 {
return Err(anyhow!("Cannot parse git version: {}", s));
}
let major = parts[0].parse().context("Invalid major version")?;
let minor = parts[1].parse().context("Invalid minor version")?;
let patch = parts.get(2).and_then(|p| p.parse().ok()).unwrap_or(0);
Ok(GitVersion { major, minor, patch })
}
pub fn get_repo_root(path: &Path) -> Result<PathBuf> {
let output = Command::new("git")
.args(["rev-parse", "--show-toplevel"])
.current_dir(path)
.output()
.context("Failed to run git rev-parse")?;
if !output.status.success() {
return Err(anyhow!(
"Not a git repository: {}",
String::from_utf8_lossy(&output.stderr)
));
}
let root = String::from_utf8_lossy(&output.stdout)
.trim()
.to_string();
Ok(PathBuf::from(root))
}
fn ref_exists(repo_path: &Path, git_ref: &str) -> bool {
Command::new("git")
.args(["rev-parse", "--verify", git_ref])
.current_dir(repo_path)
.output()
.map(|o| o.status.success())
.unwrap_or(false)
}
pub fn detect_base_branch(repo_path: &Path) -> Result<String> {
for branch in &["main", "master"] {
if ref_exists(repo_path, &format!("origin/{}", branch)) {
return Ok(branch.to_string());
}
}
for branch in &["main", "master"] {
if ref_exists(repo_path, branch) {
return Ok(branch.to_string());
}
}
Err(anyhow!("Could not find 'main' or 'master' branch"))
}
pub fn get_merge_base_preferring_origin(repo_path: &Path, base_branch: &str) -> Result<String> {
let remote_ref = format!("origin/{}", base_branch);
get_merge_base(repo_path, &remote_ref)
.or_else(|_| get_merge_base(repo_path, base_branch))
}
pub(super) fn get_merge_base(repo_path: &Path, base_branch: &str) -> Result<String> {
let output = Command::new("git")
.args(["merge-base", base_branch, "HEAD"])
.current_dir(repo_path)
.output()
.context("Failed to run git merge-base")?;
if !output.status.success() {
return Err(anyhow!(
"Failed to find merge-base: {}",
String::from_utf8_lossy(&output.stderr)
));
}
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
}
pub fn compute_upstream_divergence(
repo_path: &Path,
merge_base: &str,
base_branch: &str,
) -> Option<UpstreamDivergence> {
if merge_base.is_empty() {
return None;
}
let remote_ref = format!("origin/{}", base_branch);
if !ref_exists(repo_path, &remote_ref) {
return None;
}
let behind_count = rev_list_count(repo_path, merge_base, &remote_ref).unwrap_or(0);
if behind_count == 0 {
return None;
}
let upstream_files = upstream_changed_files(repo_path, merge_base, &remote_ref)
.unwrap_or_default();
Some(UpstreamDivergence {
behind_count,
upstream_files,
})
}
fn rev_list_count(repo_path: &Path, from: &str, to: &str) -> Result<usize> {
let range = format!("{}..{}", from, to);
let output = Command::new("git")
.args(["rev-list", "--count", &range])
.current_dir(repo_path)
.output()
.context("Failed to run git rev-list --count")?;
if !output.status.success() {
return Err(anyhow!("git rev-list --count failed"));
}
String::from_utf8_lossy(&output.stdout)
.trim()
.parse::<usize>()
.context("Failed to parse rev-list count")
}
fn upstream_changed_files(
repo_path: &Path,
from: &str,
to: &str,
) -> Result<HashSet<String>> {
let range = format!("{}..{}", from, to);
let output = Command::new("git")
.args(["diff", "--name-only", &range])
.current_dir(repo_path)
.output()
.context("Failed to run git diff --name-only")?;
if !output.status.success() {
return Err(anyhow!("git diff --name-only failed"));
}
Ok(String::from_utf8_lossy(&output.stdout)
.lines()
.filter(|l| !l.is_empty())
.map(|l| l.to_string())
.collect())
}
pub(super) fn get_file_at_ref(repo_path: &Path, file_path: &str, git_ref: &str) -> Result<Option<String>> {
let ref_path = if git_ref.is_empty() {
format!(":{}", file_path)
} else {
format!("{}:{}", git_ref, file_path)
};
let output = run_vcs_with_retry("git", repo_path, &["show", &ref_path], is_transient_error)?;
if !output.status.success() {
return Ok(None);
}
Ok(Some(String::from_utf8_lossy(&output.stdout).into_owned()))
}
pub(super) fn batch_file_contents(
repo_path: &Path,
file_paths: &[&str],
git_ref: &str,
) -> HashMap<String, String> {
if file_paths.is_empty() {
return HashMap::new();
}
let mut child = match Command::new("git")
.args(["cat-file", "--batch"])
.current_dir(repo_path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::null())
.spawn()
{
Ok(child) => child,
Err(_) => return HashMap::new(),
};
let child_stdin = child.stdin.take().expect("stdin was piped");
let child_stdout = child.stdout.take().expect("stdout was piped");
let specs: Vec<String> = file_paths
.iter()
.map(|path| {
if git_ref.is_empty() {
format!(":{path}")
} else {
format!("{git_ref}:{path}")
}
})
.collect();
let writer_handle = std::thread::spawn(move || {
let mut writer = BufWriter::new(child_stdin);
for spec in &specs {
let _ = writeln!(writer, "{spec}");
}
let _ = writer.flush();
});
let mut reader = BufReader::new(child_stdout);
let mut results = HashMap::with_capacity(file_paths.len());
let mut header_line = String::new();
for &path in file_paths {
header_line.clear();
if reader.read_line(&mut header_line).unwrap_or(0) == 0 {
break;
}
let header = header_line.trim_end();
let parts: Vec<&str> = header.splitn(4, ' ').collect();
if parts.len() < 3 {
continue;
}
let obj_type = parts[1];
let size: usize = match parts[2].parse() {
Ok(n) => n,
Err(_) => continue,
};
let mut content_buf = vec![0u8; size];
if reader.read_exact(&mut content_buf).is_err() {
break;
}
let mut trailing = [0u8; 1];
let _ = reader.read_exact(&mut trailing);
if obj_type == "blob" {
results.insert(
path.to_string(),
String::from_utf8_lossy(&content_buf).into_owned(),
);
}
}
let _ = writer_handle.join();
let _ = child.wait();
results
}
pub(super) fn get_working_tree_file(repo_path: &Path, file_path: &str) -> Result<Option<String>> {
let full_path = repo_path.join(file_path);
if !full_path.exists() {
return Ok(None);
}
match std::fs::read(&full_path) {
Ok(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).into_owned())),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(e.into()),
}
}
pub fn get_file_bytes_at_ref(
repo_path: &Path,
file_path: &str,
git_ref: &str,
) -> Result<Option<Vec<u8>>> {
let ref_path = if git_ref.is_empty() {
format!(":{}", file_path)
} else {
format!("{}:{}", git_ref, file_path)
};
let output = run_vcs_with_retry("git", repo_path, &["show", &ref_path], is_transient_error)?;
if !output.status.success() {
return Ok(None);
}
Ok(Some(output.stdout))
}
pub fn get_working_tree_bytes(repo_path: &Path, file_path: &str) -> Result<Option<Vec<u8>>> {
let full_path = repo_path.join(file_path);
if !full_path.exists() {
return Ok(None);
}
match std::fs::read(&full_path) {
Ok(bytes) => Ok(Some(bytes)),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(e.into()),
}
}
pub fn is_binary_file(repo_path: &Path, file_path: &str) -> bool {
let output = Command::new("git")
.args(["diff", "--numstat", "--", file_path])
.current_dir(repo_path)
.output();
match output {
Ok(o) => {
let s = String::from_utf8_lossy(&o.stdout);
s.starts_with("-\t-\t")
}
Err(_) => false,
}
}
pub fn get_binary_files(repo_path: &Path, merge_base: &str) -> HashSet<String> {
let mut binaries = HashSet::new();
let base_ref = if merge_base.is_empty() {
"4b825dc642cb6eb9a060e54bf8d69288fbee4904" } else {
merge_base
};
let output = Command::new("git")
.args(["diff", "--numstat", base_ref])
.current_dir(repo_path)
.output();
if let Ok(o) = output {
let s = String::from_utf8_lossy(&o.stdout);
for line in s.lines() {
if let Some(path) = line.strip_prefix("-\t-\t") {
let actual_path = if path.contains(" => ") {
path.split(" => ").last().unwrap_or(path)
} else {
path
};
binaries.insert(actual_path.to_string());
}
}
}
binaries
}
pub fn fetch_base_branch(repo_path: &Path, base_branch: &str) -> Result<()> {
use std::time::Duration;
use std::io::Read;
let current = get_current_branch(repo_path).ok().flatten();
let on_base_branch = current.as_deref() == Some(base_branch);
let refspec = format!("{}:{}", base_branch, base_branch);
let fetch_arg = if on_base_branch { base_branch } else { &refspec };
let mut child = Command::new("git")
.args(["-c", "gc.auto=0", "fetch", "--no-tags", "origin", fetch_arg])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.context("Failed to spawn git fetch")?;
let timeout = Duration::from_secs(30);
let start = std::time::Instant::now();
loop {
match child.try_wait() {
Ok(Some(status)) => {
if !status.success() {
let mut stderr = String::new();
if let Some(mut err) = child.stderr.take() {
let _ = err.read_to_string(&mut stderr);
}
return Err(anyhow!("git fetch failed: {}", stderr));
}
return Ok(());
}
Ok(None) => {
if start.elapsed() > timeout {
let _ = child.kill();
return Err(anyhow!("git fetch timed out"));
}
std::thread::sleep(Duration::from_millis(100));
}
Err(e) => return Err(anyhow!("Error waiting for git fetch: {}", e)),
}
}
}
pub fn has_merge_conflicts(repo_path: &Path, base_branch: &str, git_version: &GitVersion) -> Result<bool> {
if !git_version.at_least(MERGE_TREE_MIN_VERSION.0, MERGE_TREE_MIN_VERSION.1) {
return Ok(false);
}
let remote_ref = format!("origin/{}", base_branch);
let remote_exists = Command::new("git")
.args(["rev-parse", "--verify", &remote_ref])
.current_dir(repo_path)
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !remote_exists {
return Ok(false);
}
let output = Command::new("git")
.args(["merge-tree", "--write-tree", &remote_ref, "HEAD"])
.current_dir(repo_path)
.output()
.context("Failed to run git merge-tree")?;
Ok(!output.status.success())
}
pub fn get_current_branch(repo_path: &Path) -> Result<Option<String>> {
let output = Command::new("git")
.args(["rev-parse", "--abbrev-ref", "HEAD"])
.current_dir(repo_path)
.output()
.context("Failed to get current branch")?;
if !output.status.success() {
return Ok(None);
}
let branch = String::from_utf8_lossy(&output.stdout).trim().to_string();
if branch == "HEAD" {
Ok(None)
} else {
Ok(Some(branch))
}
}