use std::fs;
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use crate::error::{Error, Result};
use crate::index::{Index, IndexEntry};
use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind, TreeEntry};
use crate::odb::Odb;
use crate::userdiff::FuncnameMatcher;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiffStatus {
Added,
Deleted,
Modified,
Renamed,
Copied,
TypeChanged,
Unmerged,
}
impl DiffStatus {
#[must_use]
pub fn letter(&self) -> char {
match self {
Self::Added => 'A',
Self::Deleted => 'D',
Self::Modified => 'M',
Self::Renamed => 'R',
Self::Copied => 'C',
Self::TypeChanged => 'T',
Self::Unmerged => 'U',
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DiffEntry {
pub status: DiffStatus,
pub old_path: Option<String>,
pub new_path: Option<String>,
pub old_mode: String,
pub new_mode: String,
pub old_oid: ObjectId,
pub new_oid: ObjectId,
pub score: Option<u32>,
}
impl DiffEntry {
#[must_use]
pub fn path(&self) -> &str {
self.new_path
.as_deref()
.or(self.old_path.as_deref())
.unwrap_or("")
}
#[must_use]
pub fn display_path(&self) -> String {
match self.status {
DiffStatus::Renamed | DiffStatus::Copied => {
let old = self.old_path.as_deref().unwrap_or("");
let new = self.new_path.as_deref().unwrap_or("");
if old.is_empty() || new.is_empty() {
self.path().to_owned()
} else {
format!("{old} -> {new}")
}
}
_ => self.path().to_owned(),
}
}
}
pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
#[must_use]
pub fn zero_oid() -> ObjectId {
ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
panic!("internal error: failed to create zero OID");
})
}
#[must_use]
pub fn empty_blob_oid() -> ObjectId {
ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
panic!("internal error: failed to create empty blob OID");
})
}
pub fn diff_trees(
odb: &Odb,
old_tree_oid: Option<&ObjectId>,
new_tree_oid: Option<&ObjectId>,
prefix: &str,
) -> Result<Vec<DiffEntry>> {
diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
}
pub fn diff_trees_show_tree_entries(
odb: &Odb,
old_tree_oid: Option<&ObjectId>,
new_tree_oid: Option<&ObjectId>,
prefix: &str,
) -> Result<Vec<DiffEntry>> {
diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
}
fn diff_trees_opts(
odb: &Odb,
old_tree_oid: Option<&ObjectId>,
new_tree_oid: Option<&ObjectId>,
prefix: &str,
show_trees: bool,
) -> Result<Vec<DiffEntry>> {
let old_entries = match old_tree_oid {
Some(oid) => read_tree(odb, oid)?,
None => Vec::new(),
};
let new_entries = match new_tree_oid {
Some(oid) => read_tree(odb, oid)?,
None => Vec::new(),
};
let mut result = Vec::new();
diff_tree_entries_opts(
odb,
&old_entries,
&new_entries,
prefix,
show_trees,
&mut result,
)?;
Ok(result)
}
fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
let obj = odb.read(oid)?;
if obj.kind != ObjectKind::Tree {
return Err(Error::CorruptObject(format!(
"expected tree, got {}",
obj.kind.as_str()
)));
}
parse_tree(&obj.data)
}
fn diff_tree_entries_opts(
odb: &Odb,
old: &[TreeEntry],
new: &[TreeEntry],
prefix: &str,
show_trees: bool,
result: &mut Vec<DiffEntry>,
) -> Result<()> {
let mut oi = 0;
let mut ni = 0;
while oi < old.len() || ni < new.len() {
match (old.get(oi), new.get(ni)) {
(Some(o), Some(n)) => {
let cmp = crate::objects::tree_entry_cmp(
&o.name,
is_tree_mode(o.mode),
&n.name,
is_tree_mode(n.mode),
);
match cmp {
std::cmp::Ordering::Less => {
emit_deleted_opts(odb, o, prefix, show_trees, result)?;
oi += 1;
}
std::cmp::Ordering::Greater => {
emit_added_opts(odb, n, prefix, show_trees, result)?;
ni += 1;
}
std::cmp::Ordering::Equal => {
if o.oid != n.oid || o.mode != n.mode {
let name_str = String::from_utf8_lossy(&o.name);
let path = format_path(prefix, &name_str);
if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
if show_trees {
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: format_mode(o.mode),
new_mode: format_mode(n.mode),
old_oid: o.oid,
new_oid: n.oid,
score: None,
});
}
let nested = diff_trees_opts(
odb,
Some(&o.oid),
Some(&n.oid),
&path,
show_trees,
)?;
result.extend(nested);
} else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
emit_deleted_opts(odb, o, prefix, show_trees, result)?;
emit_added_opts(odb, n, prefix, show_trees, result)?;
} else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
emit_deleted_opts(odb, o, prefix, show_trees, result)?;
emit_added_opts(odb, n, prefix, show_trees, result)?;
} else {
let old_type = o.mode & 0o170000;
let new_type = n.mode & 0o170000;
result.push(DiffEntry {
status: if old_type != new_type {
DiffStatus::TypeChanged
} else {
DiffStatus::Modified
},
old_path: Some(path.clone()),
new_path: Some(path),
old_mode: format_mode(o.mode),
new_mode: format_mode(n.mode),
old_oid: o.oid,
new_oid: n.oid,
score: None,
});
}
}
oi += 1;
ni += 1;
}
}
}
(Some(o), None) => {
emit_deleted_opts(odb, o, prefix, show_trees, result)?;
oi += 1;
}
(None, Some(n)) => {
emit_added_opts(odb, n, prefix, show_trees, result)?;
ni += 1;
}
(None, None) => break,
}
}
Ok(())
}
fn emit_deleted_opts(
odb: &Odb,
entry: &TreeEntry,
prefix: &str,
show_trees: bool,
result: &mut Vec<DiffEntry>,
) -> Result<()> {
let name_str = String::from_utf8_lossy(&entry.name);
let path = format_path(prefix, &name_str);
if is_tree_mode(entry.mode) {
if show_trees {
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path.clone()),
new_path: None,
old_mode: format_mode(entry.mode),
new_mode: "000000".to_owned(),
old_oid: entry.oid,
new_oid: zero_oid(),
score: None,
});
}
let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
result.extend(nested);
} else {
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path.clone()),
new_path: None,
old_mode: format_mode(entry.mode),
new_mode: "000000".to_owned(),
old_oid: entry.oid,
new_oid: zero_oid(),
score: None,
});
}
Ok(())
}
fn emit_added_opts(
odb: &Odb,
entry: &TreeEntry,
prefix: &str,
show_trees: bool,
result: &mut Vec<DiffEntry>,
) -> Result<()> {
let name_str = String::from_utf8_lossy(&entry.name);
let path = format_path(prefix, &name_str);
if is_tree_mode(entry.mode) {
if show_trees {
result.push(DiffEntry {
status: DiffStatus::Added,
old_path: None,
new_path: Some(path.clone()),
old_mode: "000000".to_owned(),
new_mode: format_mode(entry.mode),
old_oid: zero_oid(),
new_oid: entry.oid,
score: None,
});
}
let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
result.extend(nested);
} else {
result.push(DiffEntry {
status: DiffStatus::Added,
old_path: None,
new_path: Some(path),
old_mode: "000000".to_owned(),
new_mode: format_mode(entry.mode),
old_oid: zero_oid(),
new_oid: entry.oid,
score: None,
});
}
Ok(())
}
pub fn diff_index_to_tree(
odb: &Odb,
index: &Index,
tree_oid: Option<&ObjectId>,
) -> Result<Vec<DiffEntry>> {
let tree_entries = match tree_oid {
Some(oid) => flatten_tree(odb, oid, "")?,
None => Vec::new(),
};
let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
std::collections::BTreeMap::new();
for entry in &tree_entries {
tree_map.insert(&entry.path, entry);
}
let mut result = Vec::new();
let mut stage0_paths = std::collections::BTreeSet::new();
let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
std::collections::BTreeMap::new();
for ie in &index.entries {
let path = String::from_utf8_lossy(&ie.path).to_string();
if ie.stage() == 0 && ie.intent_to_add() {
continue;
}
if ie.stage() != 0 {
let rank = match ie.stage() {
2 => 0u8,
3 => 1u8,
1 => 2u8,
_ => 3u8,
};
match unmerged_modes.get(&path) {
Some((existing_rank, _)) if *existing_rank <= rank => {}
_ => {
unmerged_modes.insert(path, (rank, ie.mode));
}
}
continue;
}
stage0_paths.insert(path.clone());
match tree_map.remove(path.as_str()) {
Some(te) => {
if te.oid != ie.oid || te.mode != ie.mode {
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path),
old_mode: format_mode(te.mode),
new_mode: format_mode(ie.mode),
old_oid: te.oid,
new_oid: ie.oid,
score: None,
});
}
}
None => {
result.push(DiffEntry {
status: DiffStatus::Added,
old_path: None,
new_path: Some(path),
old_mode: "000000".to_owned(),
new_mode: format_mode(ie.mode),
old_oid: zero_oid(),
new_oid: ie.oid,
score: None,
});
}
}
}
for (path, (_, mode)) in &unmerged_modes {
if stage0_paths.contains(path) {
continue;
}
tree_map.remove(path.as_str());
result.push(DiffEntry {
status: DiffStatus::Unmerged,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: "000000".to_owned(),
new_mode: format_mode(*mode),
old_oid: zero_oid(),
new_oid: zero_oid(),
score: None,
});
}
for (path, te) in tree_map {
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path.to_owned()),
new_path: None,
old_mode: format_mode(te.mode),
new_mode: "000000".to_owned(),
old_oid: te.oid,
new_oid: zero_oid(),
score: None,
});
}
result.sort_by(|a, b| a.path().cmp(b.path()));
Ok(result)
}
pub fn diff_index_to_worktree(
odb: &Odb,
index: &Index,
work_tree: &Path,
) -> Result<Vec<DiffEntry>> {
use crate::config::ConfigSet;
use crate::crlf;
let git_dir = work_tree.join(".git");
let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
let conv = crlf::ConversionConfig::from_config(&config);
let attrs = crlf::load_gitattributes(work_tree);
let mut result = Vec::new();
let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
std::collections::BTreeMap::new();
for ie in &index.entries {
if ie.stage() != 0 {
let path = String::from_utf8_lossy(&ie.path).to_string();
let rank = match ie.stage() {
2 => 0u8,
3 => 1u8,
1 => 2u8,
_ => 3u8,
};
match unmerged_base.get(&path) {
Some((existing_rank, _)) if *existing_rank <= rank => {}
_ => {
unmerged_base.insert(path, (rank, ie));
}
}
continue;
}
if ie.skip_worktree() {
continue;
}
let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
let is_intent_to_add = ie.intent_to_add();
if ie.assume_unchanged() || ie.skip_worktree() {
continue;
}
if ie.mode == 0o160000 {
let sub_dir = work_tree.join(path_str_ref);
let sub_head_oid = read_submodule_head_oid(&sub_dir);
let matches_index = match sub_head_oid {
Some(oid) => oid == ie.oid,
None => submodule_worktree_is_unpopulated_placeholder(&sub_dir),
};
if !matches_index {
let path_owned = path_str_ref.to_owned();
let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path_owned.clone()),
new_path: Some(path_owned),
old_mode: format_mode(ie.mode),
new_mode: format_mode(ie.mode),
old_oid: ie.oid,
new_oid,
score: None,
});
}
continue;
}
let file_path = work_tree.join(path_str_ref);
if is_intent_to_add {
match fs::symlink_metadata(&file_path) {
Ok(meta) => {
let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
let worktree_oid = hash_worktree_file(
odb,
&file_path,
&meta,
&conv,
&file_attrs,
path_str_ref,
None,
)?;
let worktree_mode = mode_from_metadata(&meta);
result.push(DiffEntry {
status: DiffStatus::Added,
old_path: None,
new_path: Some(path_str_ref.to_owned()),
old_mode: "000000".to_owned(),
new_mode: format_mode(worktree_mode),
old_oid: zero_oid(),
new_oid: worktree_oid,
score: None,
});
}
Err(e)
if e.kind() == std::io::ErrorKind::NotFound
|| e.raw_os_error() == Some(20) =>
{
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path_str_ref.to_owned()),
new_path: None,
old_mode: format_mode(ie.mode),
new_mode: "000000".to_owned(),
old_oid: empty_blob_oid(),
new_oid: zero_oid(),
score: None,
});
}
Err(e) => return Err(Error::Io(e)),
}
continue;
}
if has_symlink_in_path(work_tree, path_str_ref) {
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path_str_ref.to_owned()),
new_path: None,
old_mode: format_mode(ie.mode),
new_mode: "000000".to_owned(),
old_oid: ie.oid,
new_oid: zero_oid(),
score: None,
});
continue;
}
match fs::symlink_metadata(&file_path) {
Ok(meta) if meta.is_dir() => {
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path_str_ref.to_owned()),
new_path: None,
old_mode: format_mode(ie.mode),
new_mode: String::new(),
old_oid: ie.oid,
new_oid: zero_oid(),
score: None,
});
}
Ok(meta) => {
let worktree_mode = mode_from_metadata(&meta);
if stat_matches(ie, &meta) && worktree_mode != ie.mode {
let path_owned = path_str_ref.to_owned();
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path_owned.clone()),
new_path: Some(path_owned),
old_mode: format_mode(ie.mode),
new_mode: format_mode(worktree_mode),
old_oid: ie.oid,
new_oid: ie.oid,
score: None,
});
continue;
}
let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
let worktree_oid = hash_worktree_file(
odb,
&file_path,
&meta,
&conv,
&file_attrs,
path_str_ref,
Some(ie),
)?;
let mut eff_oid = worktree_oid;
if eff_oid != ie.oid {
if let Ok(raw) = fs::read(&file_path) {
let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
if raw_oid == ie.oid {
eff_oid = ie.oid;
}
}
}
if eff_oid != ie.oid || worktree_mode != ie.mode {
let path_owned = path_str_ref.to_owned();
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path_owned.clone()),
new_path: Some(path_owned),
old_mode: format_mode(ie.mode),
new_mode: format_mode(worktree_mode),
old_oid: ie.oid,
new_oid: eff_oid,
score: None,
});
}
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound
|| e.raw_os_error() == Some(20) => {
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path_str_ref.to_owned()),
new_path: None,
old_mode: format_mode(ie.mode),
new_mode: "000000".to_owned(),
old_oid: ie.oid,
new_oid: zero_oid(),
score: None,
});
}
Err(e) => return Err(Error::Io(e)),
}
}
for (path, (_, base_entry)) in unmerged_base {
let file_path = work_tree.join(&path);
let wt_meta = match fs::symlink_metadata(&file_path) {
Ok(meta) => Some(meta),
Err(e)
if e.kind() == std::io::ErrorKind::NotFound
|| e.raw_os_error() == Some(20) =>
{
None
}
Err(e) => return Err(Error::Io(e)),
};
let new_mode = wt_meta.as_ref().map_or_else(
|| "000000".to_owned(),
|meta| format_mode(mode_from_metadata(meta)),
);
result.push(DiffEntry {
status: DiffStatus::Unmerged,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: "000000".to_owned(),
new_mode,
old_oid: zero_oid(),
new_oid: zero_oid(),
score: None,
});
if let Some(meta) = wt_meta {
let file_attrs = crlf::get_file_attrs(&attrs, &path, false, &config);
let wt_oid = hash_worktree_file(
odb,
&file_path,
&meta,
&conv,
&file_attrs,
&path,
Some(base_entry),
)?;
let wt_mode = mode_from_metadata(&meta);
if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path),
old_mode: format_mode(base_entry.mode),
new_mode: format_mode(wt_mode),
old_oid: base_entry.oid,
new_oid: wt_oid,
score: None,
});
}
}
}
Ok(result)
}
pub fn worktree_differs_from_index_entry(
odb: &Odb,
work_tree: &Path,
ie: &IndexEntry,
) -> Result<bool> {
use crate::config::ConfigSet;
use crate::crlf;
let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
let file_path = work_tree.join(path_str_ref);
if ie.mode == 0o160000 {
let sub_head_oid = read_submodule_head(&file_path);
return Ok(sub_head_oid.as_ref() != Some(&ie.oid));
}
let meta = match fs::symlink_metadata(&file_path) {
Ok(m) => m,
Err(e)
if e.kind() == std::io::ErrorKind::NotFound
|| e.raw_os_error() == Some(20) =>
{
return Ok(true);
}
Err(e) => return Err(Error::Io(e)),
};
if meta.is_dir() {
return Ok(true);
}
let worktree_mode = mode_from_metadata(&meta);
if worktree_mode != ie.mode {
return Ok(true);
}
let git_dir = work_tree.join(".git");
let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
let conv = crlf::ConversionConfig::from_config(&config);
let attrs = crlf::load_gitattributes(work_tree);
let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
let worktree_oid = hash_worktree_file(
odb,
&file_path,
&meta,
&conv,
&file_attrs,
path_str_ref,
Some(ie),
)?;
let mut eff_oid = worktree_oid;
if eff_oid != ie.oid {
if let Ok(raw) = fs::read(&file_path) {
let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
if raw_oid == ie.oid {
eff_oid = ie.oid;
}
}
}
Ok(eff_oid != ie.oid)
}
pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
if meta.len() as u32 != ie.size {
return false;
}
if meta.mtime() as u32 != ie.mtime_sec {
return false;
}
if meta.mtime_nsec() as u32 != ie.mtime_nsec {
return false;
}
if meta.ctime() as u32 != ie.ctime_sec {
return false;
}
if meta.ctime_nsec() as u32 != ie.ctime_nsec {
return false;
}
if meta.ino() as u32 != ie.ino {
return false;
}
if meta.dev() as u32 != ie.dev {
return false;
}
true
}
fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
let mut check = work_tree.to_path_buf();
let components: Vec<&str> = rel_path.split('/').collect();
for component in &components[..components.len().saturating_sub(1)] {
check.push(component);
match fs::symlink_metadata(&check) {
Ok(meta) if meta.file_type().is_symlink() => return true,
_ => {}
}
}
false
}
pub fn hash_worktree_file(
odb: &Odb,
path: &Path,
meta: &fs::Metadata,
conv: &crate::crlf::ConversionConfig,
file_attrs: &crate::crlf::FileAttrs,
rel_path: &str,
index_entry: Option<&IndexEntry>,
) -> Result<ObjectId> {
let prior_blob: Option<Vec<u8>> = index_entry
.filter(|e| e.oid != zero_oid())
.and_then(|e| odb.read(&e.oid).ok().map(|o| o.data));
let data = if meta.file_type().is_symlink() {
let target = fs::read_link(path)?;
target.to_string_lossy().into_owned().into_bytes()
} else {
let raw = fs::read(path)?;
let opts = crate::crlf::ConvertToGitOpts {
index_blob: prior_blob.as_deref(),
renormalize: false,
check_safecrlf: false,
};
crate::crlf::convert_to_git_with_opts(&raw, rel_path, conv, file_attrs, opts).unwrap_or(raw)
};
Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
}
pub fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
if meta.file_type().is_symlink() {
0o120000
} else if meta.mode() & 0o111 != 0 {
0o100755
} else {
0o100644
}
}
pub fn diff_tree_to_worktree(
odb: &Odb,
tree_oid: Option<&ObjectId>,
work_tree: &Path,
index: &Index,
) -> Result<Vec<DiffEntry>> {
use crate::config::ConfigSet;
use crate::crlf;
let git_dir = work_tree.join(".git");
let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
let conv = crlf::ConversionConfig::from_config(&config);
let attrs = crlf::load_gitattributes(work_tree);
let tree_flat = match tree_oid {
Some(oid) => flatten_tree(odb, oid, "")?,
None => Vec::new(),
};
let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
std::collections::BTreeMap::new();
let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
for ie in &index.entries {
if ie.stage() != 0 {
continue;
}
let path = String::from_utf8_lossy(&ie.path).to_string();
index_entries.insert(&ie.path, ie);
index_paths.insert(path);
}
let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
all_paths.extend(tree_map.keys().cloned());
all_paths.extend(index_paths.iter().cloned());
let mut result = Vec::new();
for path in &all_paths {
let tree_entry = tree_map.get(path.as_str());
let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
|| index_entries
.get(path.as_bytes())
.is_some_and(|ie| ie.mode == 0o160000);
if is_gitlink {
if let Some(te) = tree_entry {
let sub_dir = work_tree.join(path);
let sub_head = read_submodule_head_oid(&sub_dir);
if sub_head.as_ref() != Some(&te.oid) {
let new_oid = sub_head.unwrap_or_else(zero_oid);
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: format_mode(te.mode),
new_mode: format_mode(te.mode),
old_oid: te.oid,
new_oid,
score: None,
});
}
}
continue;
}
let file_path = work_tree.join(path);
let wt_meta = match fs::symlink_metadata(&file_path) {
Ok(m) => Some(m),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
Err(e) => return Err(Error::Io(e)),
};
match (tree_entry, wt_meta) {
(Some(te), Some(ref meta)) => {
let wt_mode = mode_from_metadata(meta);
let Some(ie) = index_entries.get(path.as_bytes()) else {
continue;
};
let index_matches_tree = ie.oid == te.oid && ie.mode == te.mode;
if index_matches_tree && wt_mode == te.mode && stat_matches(ie, meta) {
continue;
}
let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
let idx_ent = index_entries.get(path.as_bytes()).copied();
if ie.oid == te.oid && ie.mode != te.mode {
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: format_mode(te.mode),
new_mode: format_mode(ie.mode),
old_oid: te.oid,
new_oid: te.oid,
score: None,
});
continue;
}
if index_matches_tree {
let wt_oid = hash_worktree_file(
odb,
&file_path,
meta,
&conv,
&file_attrs,
path,
idx_ent,
)?;
let mut eff_oid = wt_oid;
if eff_oid != te.oid {
if let Ok(raw) = fs::read(&file_path) {
let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
if raw_oid == te.oid {
eff_oid = te.oid;
}
}
}
if eff_oid != te.oid {
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: format_mode(te.mode),
new_mode: format_mode(wt_mode),
old_oid: te.oid,
new_oid: eff_oid,
score: None,
});
} else if wt_mode != te.mode {
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: format_mode(te.mode),
new_mode: format_mode(wt_mode),
old_oid: te.oid,
new_oid: te.oid,
score: None,
});
}
continue;
}
let wt_oid =
hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, idx_ent)?;
let mut eff_oid = wt_oid;
if eff_oid != te.oid {
if let Ok(raw) = fs::read(&file_path) {
let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
if raw_oid == te.oid {
eff_oid = te.oid;
}
}
}
if eff_oid != te.oid || wt_mode != te.mode {
result.push(DiffEntry {
status: DiffStatus::Modified,
old_path: Some(path.clone()),
new_path: Some(path.clone()),
old_mode: format_mode(te.mode),
new_mode: format_mode(wt_mode),
old_oid: te.oid,
new_oid: eff_oid,
score: None,
});
}
}
(Some(te), None) => {
result.push(DiffEntry {
status: DiffStatus::Deleted,
old_path: Some(path.clone()),
new_path: None,
old_mode: format_mode(te.mode),
new_mode: "000000".to_owned(),
old_oid: te.oid,
new_oid: zero_oid(),
score: None,
});
}
(None, Some(ref meta)) => {
let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
let wt_oid = hash_worktree_file(
odb,
&file_path,
meta,
&conv,
&file_attrs,
path,
index_entries.get(path.as_bytes()).copied(),
)?;
let wt_mode = mode_from_metadata(meta);
result.push(DiffEntry {
status: DiffStatus::Added,
old_path: None,
new_path: Some(path.clone()),
old_mode: "000000".to_owned(),
new_mode: format_mode(wt_mode),
old_oid: zero_oid(),
new_oid: wt_oid,
score: None,
});
}
(None, None) => {
}
}
}
result.sort_by(|a, b| a.path().cmp(b.path()));
Ok(result)
}
pub fn detect_renames(odb: &Odb, entries: Vec<DiffEntry>, threshold: u32) -> Vec<DiffEntry> {
let mut deleted: Vec<DiffEntry> = Vec::new();
let mut added: Vec<DiffEntry> = Vec::new();
let mut others: Vec<DiffEntry> = Vec::new();
for entry in entries {
match entry.status {
DiffStatus::Deleted => deleted.push(entry),
DiffStatus::Added => added.push(entry),
_ => others.push(entry),
}
}
if deleted.is_empty() || added.is_empty() {
let mut result = others;
result.extend(deleted);
result.extend(added);
result.sort_by(|a, b| a.path().cmp(b.path()));
return result;
}
let deleted_contents: Vec<Option<Vec<u8>>> = deleted
.iter()
.map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
.collect();
let added_contents: Vec<Option<Vec<u8>>> = added
.iter()
.map(|a| odb.read(&a.new_oid).ok().map(|obj| obj.data))
.collect();
let mut scores: Vec<(u32, usize, usize)> = Vec::new();
for (di, del) in deleted.iter().enumerate() {
for (ai, add) in added.iter().enumerate() {
if del.old_oid == add.new_oid {
scores.push((100, di, ai));
continue;
}
let score = match (&deleted_contents[di], &added_contents[ai]) {
(Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
_ => 0,
};
if score >= threshold {
scores.push((score, di, ai));
}
}
}
scores.sort_by(|a, b| {
let a_same = same_basename(&deleted[a.1], &added[a.2]);
let b_same = same_basename(&deleted[b.1], &added[b.2]);
b_same.cmp(&a_same).then_with(|| b.0.cmp(&a.0))
});
let mut used_deleted = vec![false; deleted.len()];
let mut used_added = vec![false; added.len()];
let mut renames: Vec<DiffEntry> = Vec::new();
for (score, di, ai) in &scores {
if used_deleted[*di] || used_added[*ai] {
continue;
}
used_deleted[*di] = true;
used_added[*ai] = true;
let del = &deleted[*di];
let add = &added[*ai];
renames.push(DiffEntry {
status: DiffStatus::Renamed,
old_path: del.old_path.clone(),
new_path: add.new_path.clone(),
old_mode: del.old_mode.clone(),
new_mode: add.new_mode.clone(),
old_oid: del.old_oid,
new_oid: add.new_oid,
score: Some(*score),
});
}
let mut result = others;
result.extend(renames);
for (i, entry) in deleted.into_iter().enumerate() {
if !used_deleted[i] {
result.push(entry);
}
}
for (i, entry) in added.into_iter().enumerate() {
if !used_added[i] {
result.push(entry);
}
}
result.sort_by(|a, b| a.path().cmp(b.path()));
result
}
pub fn detect_copies(
odb: &Odb,
entries: Vec<DiffEntry>,
threshold: u32,
find_copies_harder: bool,
source_tree_entries: &[(String, String, ObjectId)],
) -> Vec<DiffEntry> {
use std::collections::{HashMap, HashSet};
let mut deleted: Vec<DiffEntry> = Vec::new();
let mut added: Vec<DiffEntry> = Vec::new();
let mut others: Vec<DiffEntry> = Vec::new();
for entry in entries {
match entry.status {
DiffStatus::Deleted => deleted.push(entry),
DiffStatus::Added => added.push(entry),
_ => others.push(entry),
}
}
if added.is_empty() {
let mut result = others;
result.extend(deleted);
result.sort_by(|a, b| a.path().cmp(b.path()));
return result;
}
let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
for entry in &deleted {
if let Some(ref path) = entry.old_path {
deleted_source_idx.insert(path.clone(), sources.len());
sources.push((path.clone(), entry.old_oid, true));
}
}
for entry in &others {
if entry.status == DiffStatus::Modified {
if let Some(ref old_path) = entry.old_path {
if !sources.iter().any(|(p, _, _)| p == old_path) {
sources.push((old_path.clone(), entry.old_oid, false));
}
}
}
}
if find_copies_harder {
for (path, _mode, oid) in source_tree_entries {
if !sources.iter().any(|(p, _, _)| p == path) {
sources.push((path.clone(), *oid, false));
}
}
}
if sources.is_empty() {
let mut result = others;
result.extend(deleted);
result.extend(added);
result.sort_by(|a, b| a.path().cmp(b.path()));
return result;
}
let source_contents: Vec<Option<Vec<u8>>> = sources
.iter()
.map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
.collect();
let added_contents: Vec<Option<Vec<u8>>> = added
.iter()
.map(|a| odb.read(&a.new_oid).ok().map(|obj| obj.data))
.collect();
let mut scores: Vec<(u32, usize, usize)> = Vec::new();
for (si, (src_path, src_oid, _)) in sources.iter().enumerate() {
for (ai, add) in added.iter().enumerate() {
if add.new_path.as_deref() == Some(src_path.as_str()) {
continue;
}
if *src_oid == add.new_oid {
scores.push((100, si, ai));
continue;
}
let score = match (&source_contents[si], &added_contents[ai]) {
(Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
_ => 0,
};
if score >= threshold {
scores.push((score, si, ai));
}
}
}
scores.sort_by(|a, b| b.0.cmp(&a.0));
let mut used_added = vec![false; added.len()];
let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
for &(score, si, ai) in &scores {
if used_added[ai] {
continue;
}
used_added[ai] = true;
source_to_added.entry(si).or_default().push((ai, score));
}
let mut used_added2 = vec![false; added.len()];
let mut result_entries: Vec<DiffEntry> = Vec::new();
let mut renamed_deleted: HashSet<usize> = HashSet::new();
for (&si, assignments_for_src) in &source_to_added {
let (_, _, is_deleted) = &sources[si];
if *is_deleted && !assignments_for_src.is_empty() {
let rename_ai = assignments_for_src
.iter()
.max_by_key(|(ai, _score)| added[*ai].path().to_string())
.map(|(ai, _)| *ai);
for &(ai, score) in assignments_for_src {
let (ref src_path, _, _) = sources[si];
let add = &added[ai];
let src_mode = source_tree_entries
.iter()
.find(|(p, _, _)| p == src_path)
.map(|(_, m, _)| m.clone())
.unwrap_or_else(|| add.old_mode.clone());
let is_rename = Some(ai) == rename_ai;
result_entries.push(DiffEntry {
status: if is_rename {
DiffStatus::Renamed
} else {
DiffStatus::Copied
},
old_path: Some(src_path.clone()),
new_path: add.new_path.clone(),
old_mode: src_mode,
new_mode: add.new_mode.clone(),
old_oid: sources[si].1,
new_oid: add.new_oid,
score: Some(score),
});
used_added2[ai] = true;
}
renamed_deleted.insert(si);
} else {
for &(ai, score) in assignments_for_src {
let (ref src_path, _, _) = sources[si];
let add = &added[ai];
let src_mode = source_tree_entries
.iter()
.find(|(p, _, _)| p == src_path)
.map(|(_, m, _)| m.clone())
.unwrap_or_else(|| add.old_mode.clone());
result_entries.push(DiffEntry {
status: DiffStatus::Copied,
old_path: Some(src_path.clone()),
new_path: add.new_path.clone(),
old_mode: src_mode,
new_mode: add.new_mode.clone(),
old_oid: sources[si].1,
new_oid: add.new_oid,
score: Some(score),
});
used_added2[ai] = true;
}
}
}
for entry in deleted.into_iter() {
if let Some(ref path) = entry.old_path {
if let Some(&si) = deleted_source_idx.get(path) {
if renamed_deleted.contains(&si) {
continue;
}
}
}
result_entries.push(entry);
}
let mut result = others;
result.extend(result_entries);
for (i, entry) in added.into_iter().enumerate() {
if !used_added2[i] {
result.push(entry);
}
}
result.sort_by(|a, b| a.path().cmp(b.path()));
result
}
pub fn status_apply_rename_copy_detection(
odb: &Odb,
unstaged_raw: Vec<DiffEntry>,
threshold: u32,
copies: bool,
head_tree: Option<&ObjectId>,
) -> Result<Vec<DiffEntry>> {
let after_renames = detect_renames(odb, unstaged_raw, threshold);
if !copies {
return Ok(after_renames);
}
let source_tree_entries: Vec<(String, String, ObjectId)> = match head_tree {
Some(oid) => flatten_tree(odb, oid, "")?
.into_iter()
.map(|e| (e.path, format_mode(e.mode), e.oid))
.collect(),
None => Vec::new(),
};
Ok(detect_copies(
odb,
after_renames,
threshold,
false,
&source_tree_entries,
))
}
pub fn format_rename_path(old: &str, new: &str) -> String {
let ob = old.as_bytes();
let nb = new.as_bytes();
let pfx = {
let mut last_sep = 0usize;
let min_len = ob.len().min(nb.len());
for i in 0..min_len {
if ob[i] != nb[i] {
break;
}
if ob[i] == b'/' {
last_sep = i + 1;
}
}
last_sep
};
let mut sfx = {
let mut last_sep = 0usize;
let min_len = ob.len().min(nb.len());
for i in 0..min_len {
let oi = ob.len() - 1 - i;
let ni = nb.len() - 1 - i;
if ob[oi] != nb[ni] {
break;
}
if ob[oi] == b'/' {
last_sep = i + 1;
}
}
last_sep
};
let mut sfx_at_old = ob.len() - sfx;
let mut sfx_at_new = nb.len() - sfx;
while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
let suffix_bytes = &ob[sfx_at_old..];
let mut new_sfx = 0;
for i in 1..suffix_bytes.len() {
if suffix_bytes[i] == b'/' {
new_sfx = sfx - i;
break;
}
}
if new_sfx == 0 || new_sfx >= sfx {
sfx_at_old = ob.len();
sfx_at_new = nb.len();
break;
}
sfx = new_sfx;
sfx_at_old = ob.len() - sfx;
sfx_at_new = nb.len() - sfx;
}
let prefix = &old[..pfx];
let suffix = &old[sfx_at_old..];
let old_mid = if pfx <= sfx_at_old {
&old[pfx..sfx_at_old]
} else {
""
};
let new_mid = if pfx <= sfx_at_new {
&new[pfx..sfx_at_new]
} else {
""
};
if prefix.is_empty() && suffix.is_empty() {
return format!("{old} => {new}");
}
format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
}
fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
let old = del.old_path.as_deref().unwrap_or("");
let new = add.new_path.as_deref().unwrap_or("");
let old_base = old.rsplit('/').next().unwrap_or(old);
let new_base = new.rsplit('/').next().unwrap_or(new);
old_base == new_base && !old_base.is_empty()
}
fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
let old_norm = crate::crlf::crlf_to_lf(old);
let new_norm = crate::crlf::crlf_to_lf(new);
let src_size = old_norm.len();
let dst_size = new_norm.len();
if src_size == 0 && dst_size == 0 {
return 100;
}
let total = src_size + dst_size;
if total == 0 {
return 100;
}
use similar::{ChangeTag, TextDiff};
let old_str = String::from_utf8_lossy(&old_norm);
let new_str = String::from_utf8_lossy(&new_norm);
let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
let mut shared_bytes = 0usize;
for change in diff.iter_all_changes() {
if change.tag() == ChangeTag::Equal {
shared_bytes += change.value().len();
}
}
let max_size = src_size.max(dst_size);
((shared_bytes * 100) / max_size).min(100) as u32
}
#[must_use]
pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
compute_similarity(old, new)
}
pub fn format_raw(entry: &DiffEntry) -> String {
let path = match entry.status {
DiffStatus::Renamed | DiffStatus::Copied => {
format!(
"{}\t{}",
entry.old_path.as_deref().unwrap_or(""),
entry.new_path.as_deref().unwrap_or("")
)
}
_ => entry.path().to_owned(),
};
let status_str = match (entry.status, entry.score) {
(DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
(DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
_ => entry.status.letter().to_string(),
};
format!(
":{} {} {} {} {}\t{}",
entry.old_mode, entry.new_mode, entry.old_oid, entry.new_oid, status_str, path
)
}
pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
"..."
} else {
""
};
let old_hex = format!("{}", entry.old_oid);
let new_hex = format!("{}", entry.new_oid);
let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
let path = entry.path();
format!(
":{} {} {}{} {}{} {}\t{}",
entry.old_mode,
entry.new_mode,
old_abbrev,
ellipsis,
new_abbrev,
ellipsis,
entry.status.letter(),
path
)
}
pub fn unified_diff(
old_content: &str,
new_content: &str,
old_path: &str,
new_path: &str,
context_lines: usize,
) -> String {
unified_diff_with_prefix(
old_content,
new_content,
old_path,
new_path,
context_lines,
0,
"a/",
"b/",
)
}
pub fn unified_diff_with_prefix(
old_content: &str,
new_content: &str,
old_path: &str,
new_path: &str,
context_lines: usize,
inter_hunk_context: usize,
src_prefix: &str,
dst_prefix: &str,
) -> String {
unified_diff_with_prefix_and_funcname(
old_content,
new_content,
old_path,
new_path,
context_lines,
inter_hunk_context,
src_prefix,
dst_prefix,
None,
)
}
pub fn unified_diff_with_prefix_and_funcname(
old_content: &str,
new_content: &str,
old_path: &str,
new_path: &str,
context_lines: usize,
inter_hunk_context: usize,
src_prefix: &str,
dst_prefix: &str,
funcname_matcher: Option<&FuncnameMatcher>,
) -> String {
unified_diff_with_prefix_and_funcname_and_algorithm(
old_content,
new_content,
old_path,
new_path,
context_lines,
inter_hunk_context,
src_prefix,
dst_prefix,
funcname_matcher,
similar::Algorithm::Myers,
)
}
pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
old_content: &str,
new_content: &str,
old_path: &str,
new_path: &str,
context_lines: usize,
inter_hunk_context: usize,
src_prefix: &str,
dst_prefix: &str,
funcname_matcher: Option<&FuncnameMatcher>,
algorithm: similar::Algorithm,
) -> String {
use similar::{group_diff_ops, udiff::UnifiedDiffHunk, TextDiff};
let diff = TextDiff::configure()
.algorithm(algorithm)
.diff_lines(old_content, new_content);
let mut output = String::new();
if old_path == "/dev/null" {
output.push_str("--- /dev/null\n");
} else {
output.push_str(&format!("--- {src_prefix}{old_path}\n"));
}
if new_path == "/dev/null" {
output.push_str("+++ /dev/null\n");
} else {
output.push_str(&format!("+++ {dst_prefix}{new_path}\n"));
}
let old_lines: Vec<&str> = old_content.lines().collect();
let group_radius = context_lines
.saturating_mul(2)
.saturating_add(inter_hunk_context);
let op_groups = group_diff_ops(diff.ops().to_vec(), group_radius);
for ops in op_groups {
if ops.is_empty() {
continue;
}
let hunk = UnifiedDiffHunk::new(ops, &diff, true);
let hunk_str = format!("{hunk}");
if let Some(first_newline) = hunk_str.find('\n') {
let header_line = &hunk_str[..first_newline];
let rest = &hunk_str[first_newline..];
if let Some(func_ctx) =
extract_function_context(header_line, &old_lines, funcname_matcher)
{
output.push_str(header_line);
output.push(' ');
output.push_str(&func_ctx);
output.push_str(rest);
} else {
output.push_str(&hunk_str);
}
} else {
output.push_str(&hunk_str);
}
}
output
}
pub fn anchored_unified_diff(
old_content: &str,
new_content: &str,
old_path: &str,
new_path: &str,
context_lines: usize,
anchors: &[String],
algorithm: similar::Algorithm,
) -> String {
use similar::TextDiff;
let old_lines: Vec<&str> = old_content.lines().collect();
let new_lines: Vec<&str> = new_content.lines().collect();
let mut anchor_pairs: Vec<(usize, usize)> = Vec::new();
for anchor in anchors {
let anchor_str = anchor.as_str();
let old_positions: Vec<usize> = old_lines
.iter()
.enumerate()
.filter(|(_, l)| l.trim_end() == anchor_str)
.map(|(i, _)| i)
.collect();
let new_positions: Vec<usize> = new_lines
.iter()
.enumerate()
.filter(|(_, l)| l.trim_end() == anchor_str)
.map(|(i, _)| i)
.collect();
if old_positions.len() == 1 && new_positions.len() == 1 {
anchor_pairs.push((old_positions[0], new_positions[0]));
}
}
if anchor_pairs.is_empty() {
return unified_diff_with_prefix_and_funcname_and_algorithm(
old_content,
new_content,
old_path,
new_path,
context_lines,
0,
"a/",
"b/",
None,
algorithm,
);
}
anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
let mut filtered: Vec<(usize, usize)> = Vec::new();
for &pair in &anchor_pairs {
if filtered.is_empty() || pair.1 > filtered.last().unwrap().1 {
filtered.push(pair);
}
}
let anchor_pairs = filtered;
struct DiffOp {
tag: char, line: String,
}
let mut ops: Vec<DiffOp> = Vec::new();
let mut old_pos = 0usize;
let mut new_pos = 0usize;
for &(old_anchor, new_anchor) in &anchor_pairs {
let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
let old_seg_text = old_segment.join("\n");
let new_seg_text = new_segment.join("\n");
if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
let old_seg_input = if old_seg_text.is_empty() {
String::new()
} else {
format!("{}\n", old_seg_text)
};
let new_seg_input = if new_seg_text.is_empty() {
String::new()
} else {
format!("{}\n", new_seg_text)
};
let seg_diff = TextDiff::configure()
.algorithm(algorithm)
.diff_lines(&old_seg_input, &new_seg_input);
for change in seg_diff.iter_all_changes() {
let tag = match change.tag() {
similar::ChangeTag::Equal => ' ',
similar::ChangeTag::Delete => '-',
similar::ChangeTag::Insert => '+',
};
ops.push(DiffOp {
tag,
line: change.value().trim_end_matches('\n').to_string(),
});
}
}
ops.push(DiffOp {
tag: ' ',
line: old_lines[old_anchor].to_string(),
});
old_pos = old_anchor + 1;
new_pos = new_anchor + 1;
}
let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
let old_seg_text = old_segment.join("\n");
let new_seg_text = new_segment.join("\n");
if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
let old_seg_input = if old_seg_text.is_empty() {
String::new()
} else {
format!("{}\n", old_seg_text)
};
let new_seg_input = if new_seg_text.is_empty() {
String::new()
} else {
format!("{}\n", new_seg_text)
};
let seg_diff = TextDiff::configure()
.algorithm(algorithm)
.diff_lines(&old_seg_input, &new_seg_input);
for change in seg_diff.iter_all_changes() {
let tag = match change.tag() {
similar::ChangeTag::Equal => ' ',
similar::ChangeTag::Delete => '-',
similar::ChangeTag::Insert => '+',
};
ops.push(DiffOp {
tag,
line: change.value().trim_end_matches('\n').to_string(),
});
}
}
let mut output = String::new();
if old_path == "/dev/null" {
output.push_str("--- /dev/null\n");
} else {
output.push_str(&format!("--- a/{old_path}\n"));
}
if new_path == "/dev/null" {
output.push_str("+++ /dev/null\n");
} else {
output.push_str(&format!("+++ b/{new_path}\n"));
}
let total_ops = ops.len();
if total_ops == 0 {
return output;
}
let mut hunks: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
while i < total_ops {
if ops[i].tag != ' ' {
let start = i.saturating_sub(context_lines);
let mut end = i;
while end < total_ops {
if ops[end].tag != ' ' {
end += 1;
continue;
}
let mut next_change = end;
while next_change < total_ops && ops[next_change].tag == ' ' {
next_change += 1;
}
if next_change < total_ops && next_change - end <= context_lines * 2 {
end = next_change + 1;
} else {
end = (end + context_lines).min(total_ops);
break;
}
}
if let Some(last) = hunks.last_mut() {
if start <= last.1 {
last.1 = end;
} else {
hunks.push((start, end));
}
} else {
hunks.push((start, end));
}
i = end;
} else {
i += 1;
}
}
for (start, end) in hunks {
let mut old_start = 1usize;
let mut new_start = 1usize;
for op in &ops[..start] {
match op.tag {
' ' => {
old_start += 1;
new_start += 1;
}
'-' => {
old_start += 1;
}
'+' => {
new_start += 1;
}
_ => {}
}
}
let mut old_count = 0usize;
let mut new_count = 0usize;
for op in &ops[start..end] {
match op.tag {
' ' => {
old_count += 1;
new_count += 1;
}
'-' => {
old_count += 1;
}
'+' => {
new_count += 1;
}
_ => {}
}
}
output.push_str(&format!(
"@@ -{},{} +{},{} @@\n",
old_start, old_count, new_start, new_count
));
for op in &ops[start..end] {
output.push(op.tag);
output.push_str(&op.line);
output.push('\n');
}
}
output
}
fn extract_function_context(
header: &str,
old_lines: &[&str],
funcname_matcher: Option<&FuncnameMatcher>,
) -> Option<String> {
let at_pos = header.find("-")?;
let rest = &header[at_pos + 1..];
let comma_or_space = rest.find([',', ' '])?;
let start_str = &rest[..comma_or_space];
let start_line: usize = start_str.parse().ok()?;
if start_line <= 1 {
return None;
}
let search_end = (start_line - 1).min(old_lines.len());
let truncate = |text: &str| {
if text.len() > 80 {
let mut end = 80;
while end > 0 && !text.is_char_boundary(end) {
end -= 1;
}
text[..end].to_owned()
} else {
text.to_owned()
}
};
for i in (0..search_end).rev() {
let line = old_lines[i];
if line.is_empty() {
continue;
}
if let Some(matcher) = funcname_matcher {
if let Some(matched) = matcher.match_line(line) {
return Some(truncate(&matched));
}
continue;
}
let first = line.as_bytes()[0];
if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
return Some(truncate(line.trim_end_matches(char::is_whitespace)));
}
}
None
}
pub fn format_stat_line(
path: &str,
insertions: usize,
deletions: usize,
max_path_len: usize,
) -> String {
format_stat_line_width(path, insertions, deletions, max_path_len, 0)
}
pub fn format_stat_line_width(
path: &str,
insertions: usize,
deletions: usize,
max_path_len: usize,
count_width: usize,
) -> String {
let total = insertions + deletions;
let plus = "+".repeat(insertions.min(50));
let minus = "-".repeat(deletions.min(50));
let cw = if count_width > 0 {
count_width
} else {
format!("{}", total).len()
};
let bar = format!("{}{}", plus, minus);
if bar.is_empty() {
format!(
" {:<width$} | {:>cw$}",
path,
total,
width = max_path_len,
cw = cw
)
} else {
format!(
" {:<width$} | {:>cw$} {}",
path,
total,
bar,
width = max_path_len,
cw = cw
)
}
}
pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
count_changes_with_algorithm(old_content, new_content, similar::Algorithm::Myers)
}
#[must_use]
pub fn count_changes_with_algorithm(
old_content: &str,
new_content: &str,
algorithm: similar::Algorithm,
) -> (usize, usize) {
use similar::{ChangeTag, TextDiff};
let diff = TextDiff::configure()
.algorithm(algorithm)
.diff_lines(old_content, new_content);
let mut ins = 0;
let mut del = 0;
for change in diff.iter_all_changes() {
match change.tag() {
ChangeTag::Insert => ins += 1,
ChangeTag::Delete => del += 1,
ChangeTag::Equal => {}
}
}
(ins, del)
}
#[must_use]
pub fn count_git_lines(data: &[u8]) -> usize {
if data.is_empty() {
return 0;
}
let mut count = 0usize;
let mut nl_just_seen = false;
for &ch in data {
if ch == b'\n' {
count += 1;
nl_just_seen = true;
} else {
nl_just_seen = false;
}
}
if !nl_just_seen {
count += 1;
}
count
}
const DIFF_MAX_SCORE: u64 = 60_000;
const DIFF_MINIMUM_BREAK_SIZE: usize = 400;
const DIFF_DEFAULT_BREAK_SCORE: u64 = 30_000;
const DIFF_HASHBASE: u32 = 107_927;
#[derive(Clone, Copy, Default)]
struct SpanSlot {
hashval: u32,
cnt: u32,
}
struct SpanHashTop {
alloc_log2: u8,
free_slots: i32,
data: Vec<SpanSlot>,
}
impl SpanHashTop {
fn new(initial_log2: u8) -> Self {
let cap = 1usize << initial_log2;
Self {
alloc_log2: initial_log2,
free_slots: initial_free(initial_log2),
data: vec![SpanSlot::default(); cap],
}
}
fn len(&self) -> usize {
1usize << self.alloc_log2
}
fn add_span(&mut self, hashval: u32, cnt: u32) {
loop {
let lim = self.len();
let mut bucket = (hashval as usize) & (lim - 1);
loop {
let h = &mut self.data[bucket];
if h.cnt == 0 {
h.hashval = hashval;
h.cnt = cnt;
self.free_slots -= 1;
if self.free_slots < 0 {
self.rehash();
break;
}
return;
}
if h.hashval == hashval {
h.cnt = h.cnt.saturating_add(cnt);
return;
}
bucket += 1;
if bucket >= lim {
bucket = 0;
}
}
}
}
fn rehash(&mut self) {
let old = std::mem::take(&mut self.data);
let old_log = self.alloc_log2;
self.alloc_log2 = old_log.saturating_add(1);
let new_len = 1usize << self.alloc_log2;
self.free_slots = initial_free(self.alloc_log2);
self.data = vec![SpanSlot::default(); new_len];
let old_sz = 1usize << old_log;
for o in old.iter().take(old_sz) {
let o = *o;
if o.cnt == 0 {
continue;
}
self.add_span_after_rehash(o.hashval, o.cnt);
}
}
fn add_span_after_rehash(&mut self, hashval: u32, cnt: u32) {
loop {
let lim = self.len();
let mut bucket = (hashval as usize) & (lim - 1);
loop {
let h = &mut self.data[bucket];
if h.cnt == 0 {
h.hashval = hashval;
h.cnt = cnt;
self.free_slots -= 1;
if self.free_slots < 0 {
self.rehash();
break;
}
return;
}
if h.hashval == hashval {
h.cnt = h.cnt.saturating_add(cnt);
return;
}
bucket += 1;
if bucket >= lim {
bucket = 0;
}
}
}
}
fn sort_by_hashval(&mut self) {
let sz = self.len();
self.data[..sz].sort_by(|a, b| {
if a.cnt == 0 {
return std::cmp::Ordering::Greater;
}
if b.cnt == 0 {
return std::cmp::Ordering::Less;
}
a.hashval.cmp(&b.hashval)
});
}
}
fn initial_free(sz_log2: u8) -> i32 {
let sz = sz_log2 as i32;
((1i32 << sz_log2) * (sz - 3) / sz).max(0)
}
fn hash_blob_spans(buf: &[u8], is_text: bool) -> SpanHashTop {
let mut hash = SpanHashTop::new(9);
let mut n = 0u32;
let mut accum1: u32 = 0;
let mut accum2: u32 = 0;
let mut i = 0usize;
while i < buf.len() {
let c = buf[i] as u32;
let old_1 = accum1;
i += 1;
if is_text && c == b'\r' as u32 && i < buf.len() && buf[i] == b'\n' {
continue;
}
accum1 = accum1.wrapping_shl(7) ^ accum2.wrapping_shr(25);
accum2 = accum2.wrapping_shl(7) ^ old_1.wrapping_shr(25);
accum1 = accum1.wrapping_add(c);
n += 1;
if n < 64 && c != b'\n' as u32 {
continue;
}
let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
hash.add_span(hashval, n);
n = 0;
accum1 = 0;
accum2 = 0;
}
if n > 0 {
let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
hash.add_span(hashval, n);
}
hash.sort_by_hashval();
hash
}
#[must_use]
pub fn diffcore_count_changes(old: &[u8], new: &[u8]) -> (u64, u64) {
let src_is_text = !crate::merge_file::is_binary(old);
let dst_is_text = !crate::merge_file::is_binary(new);
let src_count = hash_blob_spans(old, src_is_text);
let dst_count = hash_blob_spans(new, dst_is_text);
let mut sc: u64 = 0;
let mut la: u64 = 0;
let mut si = 0usize;
let mut di = 0usize;
let src_len = src_count.len();
let dst_len = dst_count.len();
loop {
if si >= src_len || src_count.data[si].cnt == 0 {
break;
}
let s_hash = src_count.data[si].hashval;
let s_cnt = u64::from(src_count.data[si].cnt);
while di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval < s_hash {
la += u64::from(dst_count.data[di].cnt);
di += 1;
}
let mut dst_cnt = 0u64;
if di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval == s_hash {
dst_cnt = u64::from(dst_count.data[di].cnt);
di += 1;
}
if s_cnt < dst_cnt {
la += dst_cnt - s_cnt;
sc += s_cnt;
} else {
sc += dst_cnt;
}
si += 1;
}
while di < dst_len && dst_count.data[di].cnt != 0 {
la += u64::from(dst_count.data[di].cnt);
di += 1;
}
(sc, la)
}
#[must_use]
pub fn should_break_rewrite_for_stat(old: &[u8], new: &[u8]) -> bool {
should_break_rewrite_inner(old, new, DIFF_DEFAULT_BREAK_SCORE)
}
#[must_use]
pub fn rewrite_merge_score(old: &[u8], new: &[u8]) -> Option<u64> {
if old.is_empty() {
return None;
}
let max_size = old.len().max(new.len());
if max_size < DIFF_MINIMUM_BREAK_SIZE {
return None;
}
let (src_copied, _) = diffcore_count_changes(old, new);
let src_copied = src_copied.min(old.len() as u64);
let src_removed = (old.len() as u64).saturating_sub(src_copied);
Some(src_removed * DIFF_MAX_SCORE / old.len() as u64)
}
#[must_use]
pub fn rewrite_dissimilarity_index_percent(old: &[u8], new: &[u8]) -> Option<u32> {
let score = rewrite_merge_score(old, new)?;
Some((score * 100 / DIFF_MAX_SCORE).min(100) as u32)
}
fn should_break_rewrite_inner(src: &[u8], dst: &[u8], break_score: u64) -> bool {
if src.is_empty() {
return false;
}
let max_size = src.len().max(dst.len());
if max_size < DIFF_MINIMUM_BREAK_SIZE {
return false;
}
let (src_copied, literal_added) = diffcore_count_changes(src, dst);
let src_copied = src_copied.min(src.len() as u64);
let mut literal_added = literal_added;
let dst_len = dst.len() as u64;
if src_copied < dst_len && literal_added + src_copied > dst_len {
literal_added = dst_len.saturating_sub(src_copied);
}
let src_removed = (src.len() as u64).saturating_sub(src_copied);
let merge_score = src_removed * DIFF_MAX_SCORE / src.len() as u64;
if merge_score > break_score {
return true;
}
let delta_size = src_removed.saturating_add(literal_added);
if delta_size * DIFF_MAX_SCORE / (max_size as u64) < break_score {
return false;
}
let s = src.len() as u64;
if (s * break_score < src_removed * DIFF_MAX_SCORE)
&& (literal_added * 20 < src_removed)
&& (literal_added * 20 < src_copied)
{
return false;
}
true
}
struct FlatEntry {
path: String,
mode: u32,
oid: ObjectId,
}
fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
let entries = read_tree(odb, tree_oid)?;
let mut result = Vec::new();
for entry in entries {
let name_str = String::from_utf8_lossy(&entry.name);
let path = format_path(prefix, &name_str);
if is_tree_mode(entry.mode) {
let nested = flatten_tree(odb, &entry.oid, &path)?;
result.extend(nested);
} else {
result.push(FlatEntry {
path,
mode: entry.mode,
oid: entry.oid,
});
}
}
Ok(result)
}
pub fn head_path_states(
odb: &Odb,
head_tree: Option<&ObjectId>,
) -> Result<std::collections::BTreeMap<String, (u32, ObjectId)>> {
let mut m = std::collections::BTreeMap::new();
let Some(t) = head_tree else {
return Ok(m);
};
for fe in flatten_tree(odb, t, "")? {
m.insert(fe.path, (fe.mode, fe.oid));
}
Ok(m)
}
fn is_tree_mode(mode: u32) -> bool {
mode == 0o040000
}
fn format_path(prefix: &str, name: &str) -> String {
if prefix.is_empty() {
name.to_owned()
} else {
format!("{prefix}/{name}")
}
}
pub fn format_mode(mode: u32) -> String {
format!("{mode:06o}")
}
#[must_use]
pub fn read_submodule_head_for_checkout(sub_dir: &Path) -> Option<ObjectId> {
read_submodule_head(sub_dir)
}
fn submodule_worktree_is_unpopulated_placeholder(sub_dir: &Path) -> bool {
match fs::read_dir(sub_dir) {
Ok(mut it) => it.next().is_none(),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
Err(_) => false,
}
}
fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
read_submodule_head_oid(sub_dir)
}
#[must_use]
pub fn submodule_embedded_git_dir(sub_dir: &Path) -> Option<PathBuf> {
let gitfile = sub_dir.join(".git");
if gitfile.is_file() {
let content = fs::read_to_string(&gitfile).ok()?;
let gitdir = content
.lines()
.find_map(|l| l.strip_prefix("gitdir: "))?
.trim();
Some(if Path::new(gitdir).is_absolute() {
PathBuf::from(gitdir)
} else {
sub_dir.join(gitdir)
})
} else if gitfile.is_dir() {
Some(gitfile)
} else {
None
}
}
fn find_superproject_git(sub_dir: &Path) -> Option<(PathBuf, PathBuf)> {
let mut cur = sub_dir.parent()?;
loop {
let git_path = cur.join(".git");
if git_path.exists() {
let gd = if git_path.is_file() {
let content = fs::read_to_string(&git_path).ok()?;
let line = content
.lines()
.find_map(|l| l.strip_prefix("gitdir: "))?
.trim();
if Path::new(line).is_absolute() {
PathBuf::from(line)
} else {
cur.join(line)
}
} else {
git_path
};
return Some((cur.to_path_buf(), gd));
}
cur = cur.parent()?;
}
}
pub fn read_submodule_head_oid(sub_dir: &Path) -> Option<ObjectId> {
let mut git_dir = submodule_embedded_git_dir(sub_dir)?;
if let Some((super_wt, super_git_dir)) = find_superproject_git(sub_dir) {
let rel = sub_dir.strip_prefix(&super_wt).ok()?;
let rel_str = rel.to_string_lossy().replace('\\', "/");
let local_mod = super_git_dir
.join("modules")
.join(rel_str.trim_start_matches('/'));
if local_mod.join("HEAD").exists() {
let sg = super_git_dir.canonicalize().unwrap_or(super_git_dir);
let cur = git_dir.canonicalize().unwrap_or_else(|_| git_dir.clone());
if !cur.starts_with(&sg) {
git_dir = local_mod;
}
}
}
let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
let head_content = head_content.trim();
if let Some(refname) = head_content.strip_prefix("ref: ") {
let ref_path = git_dir.join(refname);
let oid_hex = fs::read_to_string(&ref_path).ok()?;
ObjectId::from_hex(oid_hex.trim()).ok()
} else {
ObjectId::from_hex(head_content).ok()
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct SubmodulePorcelainFlags {
pub new_commits: bool,
pub modified: bool,
pub untracked: bool,
}
pub fn submodule_porcelain_flags(
super_worktree: &Path,
rel_path: &str,
recorded_oid: ObjectId,
) -> SubmodulePorcelainFlags {
let sub_dir = super_worktree.join(rel_path);
let Some(sub_git_dir) = submodule_embedded_git_dir(&sub_dir) else {
return SubmodulePorcelainFlags::default();
};
let Some(sub_head) = read_submodule_head_oid(&sub_dir) else {
return SubmodulePorcelainFlags::default();
};
let new_commits = sub_head != recorded_oid;
let index_path = sub_git_dir.join("index");
let sub_index = match crate::index::Index::load(&index_path) {
Ok(ix) => ix,
Err(_) => {
return SubmodulePorcelainFlags {
new_commits,
..Default::default()
}
}
};
let tracked: std::collections::BTreeSet<String> = sub_index
.entries
.iter()
.filter(|e| e.stage() == 0)
.map(|e| String::from_utf8_lossy(&e.path).into_owned())
.collect();
let untracked = submodule_dir_has_untracked_inner(&sub_dir, &sub_dir, &tracked);
let objects_dir = sub_git_dir.join("objects");
let odb = Odb::new(&objects_dir);
let sub_head_tree = (|| -> Option<ObjectId> {
let h = fs::read_to_string(sub_git_dir.join("HEAD")).ok()?;
let h_str = h.trim();
let commit_oid = if let Some(r) = h_str.strip_prefix("ref: ") {
let oid_hex = fs::read_to_string(sub_git_dir.join(r)).ok()?;
ObjectId::from_hex(oid_hex.trim()).ok()?
} else {
ObjectId::from_hex(h_str).ok()?
};
let obj = odb.read(&commit_oid).ok()?;
let commit = parse_commit(&obj.data).ok()?;
Some(commit.tree)
})();
let staged_dirty = sub_head_tree
.as_ref()
.map(|t| diff_index_to_tree(&odb, &sub_index, Some(t)).map(|v| !v.is_empty()))
.unwrap_or(Ok(false));
let staged_dirty = staged_dirty.unwrap_or(false);
let unstaged_dirty = diff_index_to_worktree(&odb, &sub_index, &sub_dir)
.map(|v| !v.is_empty())
.unwrap_or(false);
let modified = staged_dirty || unstaged_dirty;
SubmodulePorcelainFlags {
new_commits,
modified,
untracked,
}
}
fn submodule_dir_has_untracked_inner(
dir: &Path,
root: &Path,
tracked: &std::collections::BTreeSet<String>,
) -> bool {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return false,
};
let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
sorted.sort_by_key(|e| e.file_name());
for entry in sorted {
let name = entry.file_name().to_string_lossy().to_string();
if name == ".git" {
continue;
}
let path = entry.path();
let rel = path
.strip_prefix(root)
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| name.clone());
let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
if is_dir {
if submodule_dir_has_untracked_inner(&path, root, tracked) {
return true;
}
} else if !tracked.contains(&rel) {
return true;
}
}
false
}