use sley_core::{GitError, ObjectFormat, ObjectId, RepoPath, Result, object_id_for_bytes};
pub mod render;
pub mod ws;
pub use sley_core::BString;
use sley_index::{BorrowedIndex, Index, IndexStatCache};
use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntries, TreeEntry};
use sley_odb::{FileObjectDatabase, ObjectReader, ObjectWriter};
use sley_refs::{FileRefStore, RefTarget};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fs;
use std::path::{Path, PathBuf};
pub fn gitlink_git_dir(sub_root: &Path) -> Option<PathBuf> {
let dot_git = sub_root.join(".git");
let metadata = fs::symlink_metadata(&dot_git).ok()?;
if metadata.is_dir() {
return Some(dot_git);
}
if !metadata.is_file() {
return None;
}
let contents = fs::read_to_string(&dot_git).ok()?;
let target = contents.strip_prefix("gitdir:")?.trim();
if target.is_empty() {
return None;
}
let target = PathBuf::from(target);
let git_dir = if target.is_absolute() {
target
} else {
sub_root.join(target)
};
if git_dir.is_dir() {
Some(git_dir)
} else {
None
}
}
pub fn gitlink_head_oid(sub_root: &Path, format: ObjectFormat) -> Option<ObjectId> {
let git_dir = gitlink_git_dir(sub_root)?;
let store = FileRefStore::new(&git_dir, format);
let mut target = store.read_ref("HEAD").ok()??;
for _ in 0..10 {
match target {
RefTarget::Direct(oid) => return Some(oid),
RefTarget::Symbolic(name) => target = store.read_ref(&name).ok()??,
}
}
None
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct DiffLine<'a> {
pub content: &'a [u8],
pub has_newline: bool,
}
impl<'a> DiffLine<'a> {
pub fn bytes_without_newline(&self) -> &'a [u8] {
if self.has_newline {
self.content.strip_suffix(b"\n").unwrap_or(self.content)
} else {
self.content
}
}
}
pub fn split_lines(blob: &[u8]) -> Vec<DiffLine<'_>> {
let mut lines = Vec::new();
let mut start = 0usize;
let len = blob.len();
let mut idx = 0usize;
while idx < len {
if blob[idx] == b'\n' {
lines.push(DiffLine {
content: &blob[start..=idx],
has_newline: true,
});
idx += 1;
start = idx;
} else {
idx += 1;
}
}
if start < len {
lines.push(DiffLine {
content: &blob[start..len],
has_newline: false,
});
}
lines
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiffOp {
Equal(usize),
Delete(usize),
Insert(usize),
}
pub fn myers_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
let n_total = old.len();
let m_total = new.len();
let mut prefix = 0usize;
while prefix < n_total && prefix < m_total && old[prefix] == new[prefix] {
prefix += 1;
}
let mut suffix = 0usize;
while suffix < n_total - prefix
&& suffix < m_total - prefix
&& old[n_total - 1 - suffix] == new[m_total - 1 - suffix]
{
suffix += 1;
}
let old_mid = &old[prefix..n_total - suffix];
let new_mid = &new[prefix..m_total - suffix];
let mut ops: Vec<DiffOp> = Vec::new();
if prefix > 0 {
ops.push(DiffOp::Equal(prefix));
}
myers_core(old_mid, new_mid, &mut ops);
if suffix > 0 {
ops.push(DiffOp::Equal(suffix));
}
coalesce_ops(ops)
}
fn myers_core(old: &[DiffLine<'_>], new: &[DiffLine<'_>], out: &mut Vec<DiffOp>) {
let n = old.len() as isize;
let m = new.len() as isize;
if n == 0 {
if m > 0 {
out.push(DiffOp::Insert(m as usize));
}
return;
}
if m == 0 {
out.push(DiffOp::Delete(n as usize));
return;
}
let max = (n + m) as usize;
let offset = max as isize; let width = 2 * max + 1;
let mut v = vec![0isize; width];
let mut trace: Vec<Vec<isize>> = Vec::new();
let mut found_d: Option<usize> = None;
'search: for d in 0..=(max as isize) {
trace.push(v.clone());
let mut k = -d;
while k <= d {
let kidx = (k + offset) as usize;
let mut x = if k == -d
|| (k != d && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
{
v[(k + 1 + offset) as usize]
} else {
v[(k - 1 + offset) as usize] + 1
};
let mut y = x - k;
while x < n && y < m && old[x as usize] == new[y as usize] {
x += 1;
y += 1;
}
v[kidx] = x;
if x >= n && y >= m {
found_d = Some(d as usize);
break 'search;
}
k += 2;
}
}
let Some(d_end) = found_d else {
out.push(DiffOp::Delete(n as usize));
out.push(DiffOp::Insert(m as usize));
return;
};
backtrack(n, m, &trace, d_end, offset, out);
}
fn backtrack(
n: isize,
m: isize,
trace: &[Vec<isize>],
d_end: usize,
offset: isize,
out: &mut Vec<DiffOp>,
) {
let mut x = n;
let mut y = m;
let mut rev: Vec<DiffOp> = Vec::new();
for d in (0..=d_end).rev() {
let v = &trace[d];
let k = x - y;
let prev_k = if k == -(d as isize)
|| (k != d as isize && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
{
k + 1 } else {
k - 1 };
let prev_x = v[(prev_k + offset) as usize];
let prev_y = prev_x - prev_k;
while x > prev_x && y > prev_y {
rev.push(DiffOp::Equal(1));
x -= 1;
y -= 1;
}
if d > 0 {
if x == prev_x {
rev.push(DiffOp::Insert(1));
} else {
rev.push(DiffOp::Delete(1));
}
x = prev_x;
y = prev_y;
}
}
rev.reverse();
out.extend(rev);
}
fn coalesce_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
let mut out: Vec<DiffOp> = Vec::with_capacity(ops.len());
for op in ops {
match (out.last_mut(), op) {
(Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
(Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
(Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
_ => out.push(op),
}
}
out
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct WsIgnore {
pub all_space: bool,
pub space_change: bool,
pub space_at_eol: bool,
pub cr_at_eol: bool,
}
impl WsIgnore {
pub fn is_empty(&self) -> bool {
!(self.all_space || self.space_change || self.space_at_eol || self.cr_at_eol)
}
}
#[inline]
fn xdl_isspace(c: u8) -> bool {
matches!(c, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
}
pub(crate) fn canonicalize_line_for_match(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
canonicalize_line(line, ignore)
}
fn canonicalize_line(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
if ignore.all_space {
return line.iter().copied().filter(|&c| !xdl_isspace(c)).collect();
}
if ignore.space_change {
let mut out = Vec::with_capacity(line.len());
let mut i = 0usize;
while i < line.len() {
if xdl_isspace(line[i]) {
while i < line.len() && xdl_isspace(line[i]) {
i += 1;
}
out.push(b' ');
} else {
out.push(line[i]);
i += 1;
}
}
if out.last() == Some(&b' ') {
out.pop();
}
return out;
}
if ignore.space_at_eol {
let mut end = line.len();
while end > 0 && xdl_isspace(line[end - 1]) {
end -= 1;
}
return line[..end].to_vec();
}
if ignore.cr_at_eol {
if let Some(stripped) = line.strip_suffix(b"\n") {
if let Some(without_cr) = stripped.strip_suffix(b"\r") {
let mut out = without_cr.to_vec();
out.push(b'\n');
return out;
}
} else if let Some(without_cr) = line.strip_suffix(b"\r") {
return without_cr.to_vec();
}
return line.to_vec();
}
line.to_vec()
}
fn line_is_blank(line: &[u8], ignore: WsIgnore) -> bool {
if ignore.is_empty() {
line.len() <= 1
} else {
line.iter().all(|&c| xdl_isspace(c))
}
}
pub fn myers_diff_lines_ws(
old: &[DiffLine<'_>],
new: &[DiffLine<'_>],
ignore: WsIgnore,
algorithm: DiffAlgorithm,
) -> Vec<DiffOp> {
if ignore.is_empty() {
return diff_lines_with_algorithm(old, new, algorithm);
}
let old_canon: Vec<Vec<u8>> = old
.iter()
.map(|l| canonicalize_line(l.content, ignore))
.collect();
let new_canon: Vec<Vec<u8>> = new
.iter()
.map(|l| canonicalize_line(l.content, ignore))
.collect();
let old_lines: Vec<DiffLine<'_>> = old_canon
.iter()
.map(|c| DiffLine {
content: c.as_slice(),
has_newline: true,
})
.collect();
let new_lines: Vec<DiffLine<'_>> = new_canon
.iter()
.map(|c| DiffLine {
content: c.as_slice(),
has_newline: true,
})
.collect();
diff_lines_with_algorithm(&old_lines, &new_lines, algorithm)
}
type LineKey<'a> = (&'a [u8], bool);
#[inline]
fn line_key<'a>(line: &DiffLine<'a>) -> LineKey<'a> {
(line.content, line.has_newline)
}
pub fn patience_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
let mut ops: Vec<DiffOp> = Vec::new();
patience_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
coalesce_ops(ops)
}
pub fn histogram_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
let mut ops: Vec<DiffOp> = Vec::new();
histogram_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
coalesce_ops(ops)
}
pub fn diff_lines_with_algorithm(
old: &[DiffLine<'_>],
new: &[DiffLine<'_>],
algorithm: DiffAlgorithm,
) -> Vec<DiffOp> {
match algorithm {
DiffAlgorithm::Myers | DiffAlgorithm::Minimal => myers_diff_lines(old, new),
DiffAlgorithm::Patience => patience_diff_lines(old, new),
DiffAlgorithm::Histogram => histogram_diff_lines(old, new),
}
}
fn emit_trivial_range(a0: usize, a1: usize, b0: usize, b1: usize, out: &mut Vec<DiffOp>) -> bool {
let old_len = a1 - a0;
let new_len = b1 - b0;
if old_len == 0 && new_len == 0 {
return true;
}
if old_len == 0 {
out.push(DiffOp::Insert(new_len));
return true;
}
if new_len == 0 {
out.push(DiffOp::Delete(old_len));
return true;
}
false
}
fn trim_common(
old: &[DiffLine<'_>],
new: &[DiffLine<'_>],
mut a0: usize,
mut a1: usize,
mut b0: usize,
mut b1: usize,
out: &mut Vec<DiffOp>,
) -> (usize, usize, usize, usize, usize) {
let mut prefix = 0usize;
while a0 < a1 && b0 < b1 && old[a0] == new[b0] {
a0 += 1;
b0 += 1;
prefix += 1;
}
if prefix > 0 {
out.push(DiffOp::Equal(prefix));
}
let mut suffix = 0usize;
while a1 > a0 && b1 > b0 && old[a1 - 1] == new[b1 - 1] {
a1 -= 1;
b1 -= 1;
suffix += 1;
}
(a0, a1, b0, b1, suffix)
}
fn patience_recurse(
old: &[DiffLine<'_>],
new: &[DiffLine<'_>],
a0: usize,
a1: usize,
b0: usize,
b1: usize,
out: &mut Vec<DiffOp>,
) {
if emit_trivial_range(a0, a1, b0, b1, out) {
return;
}
let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
if !emit_trivial_range(a0, a1, b0, b1, out) {
match patience_anchors(old, new, a0, a1, b0, b1) {
Some(anchors) => {
let mut cur_a = a0;
let mut cur_b = b0;
for (ai, bi) in anchors {
patience_recurse(old, new, cur_a, ai, cur_b, bi, out);
out.push(DiffOp::Equal(1));
cur_a = ai + 1;
cur_b = bi + 1;
}
patience_recurse(old, new, cur_a, a1, cur_b, b1, out);
}
None => myers_core(&old[a0..a1], &new[b0..b1], out),
}
}
if suffix > 0 {
out.push(DiffOp::Equal(suffix));
}
}
fn patience_anchors(
old: &[DiffLine<'_>],
new: &[DiffLine<'_>],
a0: usize,
a1: usize,
b0: usize,
b1: usize,
) -> Option<Vec<(usize, usize)>> {
struct Occ {
count: usize,
pos: usize,
}
let mut in_old: HashMap<LineKey<'_>, Occ> = HashMap::new();
for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
in_old
.entry(line_key(line))
.and_modify(|o| o.count += 1)
.or_insert(Occ { count: 1, pos: i });
}
let mut in_new: HashMap<LineKey<'_>, Occ> = HashMap::new();
for (j, line) in new.iter().enumerate().take(b1).skip(b0) {
in_new
.entry(line_key(line))
.and_modify(|o| o.count += 1)
.or_insert(Occ { count: 1, pos: j });
}
let mut pairs: Vec<(usize, usize)> = Vec::new();
for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
let key = line_key(line);
let Some(o) = in_old.get(&key) else { continue };
if o.count != 1 || o.pos != i {
continue;
}
if let Some(n) = in_new.get(&key)
&& n.count == 1
{
pairs.push((i, n.pos));
}
}
if pairs.is_empty() {
return None;
}
let lis = longest_increasing_by_new(&pairs);
if lis.is_empty() { None } else { Some(lis) }
}
fn longest_increasing_by_new(pairs: &[(usize, usize)]) -> Vec<(usize, usize)> {
if pairs.is_empty() {
return Vec::new();
}
let mut tails: Vec<usize> = Vec::new();
let mut prev: Vec<Option<usize>> = vec![None; pairs.len()];
for i in 0..pairs.len() {
let val = pairs[i].1;
let mut lo = 0usize;
let mut hi = tails.len();
while lo < hi {
let mid = lo + (hi - lo) / 2;
if pairs[tails[mid]].1 < val {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo > 0 {
prev[i] = Some(tails[lo - 1]);
}
if lo == tails.len() {
tails.push(i);
} else {
tails[lo] = i;
}
}
let mut result: Vec<(usize, usize)> = Vec::with_capacity(tails.len());
let mut cur = tails.last().copied();
while let Some(i) = cur {
result.push(pairs[i]);
cur = prev[i];
}
result.reverse();
result
}
fn histogram_recurse(
old: &[DiffLine<'_>],
new: &[DiffLine<'_>],
a0: usize,
a1: usize,
b0: usize,
b1: usize,
out: &mut Vec<DiffOp>,
) {
if emit_trivial_range(a0, a1, b0, b1, out) {
return;
}
let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
if !emit_trivial_range(a0, a1, b0, b1, out) {
match histogram_region(old, new, a0, a1, b0, b1) {
Some(region) => {
histogram_recurse(old, new, a0, region.old_start, b0, region.new_start, out);
out.push(DiffOp::Equal(region.len));
histogram_recurse(
old,
new,
region.old_start + region.len,
a1,
region.new_start + region.len,
b1,
out,
);
}
None => myers_core(&old[a0..a1], &new[b0..b1], out),
}
}
if suffix > 0 {
out.push(DiffOp::Equal(suffix));
}
}
struct HistogramRegion {
old_start: usize,
new_start: usize,
len: usize,
}
fn histogram_region(
old: &[DiffLine<'_>],
new: &[DiffLine<'_>],
a0: usize,
a1: usize,
b0: usize,
b1: usize,
) -> Option<HistogramRegion> {
let mut buckets: HashMap<LineKey<'_>, Vec<usize>> = HashMap::new();
for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
buckets.entry(line_key(line)).or_default().push(i);
}
let mut best: Option<HistogramRegion> = None;
let mut best_count = usize::MAX;
let mut best_len = 0usize;
let mut bj = b0;
while bj < b1 {
let key = line_key(&new[bj]);
let Some(positions) = buckets.get(&key) else {
bj += 1;
continue;
};
let occ = positions.len();
let mut next_bj = bj + 1;
for &ai in positions {
let mut start_a = ai;
let mut start_b = bj;
while start_a > a0 && start_b > b0 && old[start_a - 1] == new[start_b - 1] {
start_a -= 1;
start_b -= 1;
}
let mut len = 0usize;
while start_a + len < a1
&& start_b + len < b1
&& old[start_a + len] == new[start_b + len]
{
len += 1;
}
let run_count = occ;
let better = run_count < best_count || (run_count == best_count && len > best_len);
if better && len > 0 {
best_count = run_count;
best_len = len;
best = Some(HistogramRegion {
old_start: start_a,
new_start: start_b,
len,
});
if start_b + len > next_bj {
next_bj = start_b + len;
}
}
}
bj = next_bj.max(bj + 1);
}
best
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ConflictStyle {
#[default]
Merge,
Diff3,
}
#[derive(Debug, Clone, Copy)]
pub struct MergeBlobOptions<'a> {
pub ours_label: &'a str,
pub theirs_label: &'a str,
pub base_label: &'a str,
pub style: ConflictStyle,
}
impl Default for MergeBlobOptions<'_> {
fn default() -> Self {
Self {
ours_label: "ours",
theirs_label: "theirs",
base_label: "base",
style: ConflictStyle::Merge,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MergeBlobResult {
pub content: Vec<u8>,
pub conflicted: bool,
}
pub fn merge_blobs(
base: &[u8],
ours: &[u8],
theirs: &[u8],
options: &MergeBlobOptions<'_>,
) -> MergeBlobResult {
let base_lines = split_lines(base);
let ours_lines = split_lines(ours);
let theirs_lines = split_lines(theirs);
let ours_matches = matching_regions(&base_lines, &ours_lines);
let theirs_matches = matching_regions(&base_lines, &theirs_lines);
let stable = common_stable_segments(&ours_matches, &theirs_matches);
let mut writer = MergeWriter::new(options);
let mut base_idx = 0usize;
let mut our_idx = 0usize;
let mut their_idx = 0usize;
for seg in &stable {
let base_region = &base_lines[base_idx..seg.base_start];
let our_region = &ours_lines[our_idx..seg.ours_start];
let their_region = &theirs_lines[their_idx..seg.theirs_start];
emit_region(&mut writer, base_region, our_region, their_region);
writer.emit_lines(&base_lines[seg.base_start..seg.base_start + seg.len]);
base_idx = seg.base_start + seg.len;
our_idx = seg.ours_start + seg.len;
their_idx = seg.theirs_start + seg.len;
}
emit_region(
&mut writer,
&base_lines[base_idx..],
&ours_lines[our_idx..],
&theirs_lines[their_idx..],
);
writer.finish()
}
fn emit_region(
writer: &mut MergeWriter<'_>,
base_region: &[DiffLine<'_>],
our_region: &[DiffLine<'_>],
their_region: &[DiffLine<'_>],
) {
if our_region.is_empty() && their_region.is_empty() {
return;
}
let our_changed = our_region != base_region;
let their_changed = their_region != base_region;
match (our_changed, their_changed) {
(false, false) => writer.emit_lines(base_region),
(true, false) => writer.emit_lines(our_region),
(false, true) => writer.emit_lines(their_region),
(true, true) => {
if our_region == their_region {
writer.emit_lines(our_region);
} else {
writer.emit_conflict(our_region, base_region, their_region);
}
}
}
}
#[derive(Debug, Clone, Copy)]
struct MatchRegion {
base_start: usize,
side_start: usize,
len: usize,
}
#[derive(Debug, Clone, Copy)]
struct StableSegment {
base_start: usize,
ours_start: usize,
theirs_start: usize,
len: usize,
}
fn matching_regions(base: &[DiffLine<'_>], side: &[DiffLine<'_>]) -> Vec<MatchRegion> {
let ops = myers_diff_lines(base, side);
let mut regions = Vec::new();
let mut base_idx = 0usize;
let mut side_idx = 0usize;
for op in ops {
match op {
DiffOp::Equal(n) => {
regions.push(MatchRegion {
base_start: base_idx,
side_start: side_idx,
len: n,
});
base_idx += n;
side_idx += n;
}
DiffOp::Delete(n) => base_idx += n,
DiffOp::Insert(n) => side_idx += n,
}
}
regions
}
fn common_stable_segments(ours: &[MatchRegion], theirs: &[MatchRegion]) -> Vec<StableSegment> {
let mut segments = Vec::new();
let mut oi = 0usize;
let mut ti = 0usize;
while oi < ours.len() && ti < theirs.len() {
let o = ours[oi];
let t = theirs[ti];
let o_end = o.base_start + o.len;
let t_end = t.base_start + t.len;
let lo = o.base_start.max(t.base_start);
let hi = o_end.min(t_end);
if lo < hi {
segments.push(StableSegment {
base_start: lo,
ours_start: o.side_start + (lo - o.base_start),
theirs_start: t.side_start + (lo - t.base_start),
len: hi - lo,
});
}
if o_end <= t_end {
oi += 1;
} else {
ti += 1;
}
}
segments
}
struct MergeWriter<'a> {
out: Vec<u8>,
conflicted: bool,
options: &'a MergeBlobOptions<'a>,
}
impl<'a> MergeWriter<'a> {
fn new(options: &'a MergeBlobOptions<'a>) -> Self {
Self {
out: Vec::new(),
conflicted: false,
options,
}
}
fn emit_lines(&mut self, lines: &[DiffLine<'_>]) {
for line in lines {
self.out.extend_from_slice(line.content);
}
}
fn emit_conflict(
&mut self,
ours: &[DiffLine<'_>],
base: &[DiffLine<'_>],
theirs: &[DiffLine<'_>],
) {
self.conflicted = true;
self.write_marker(b'<', self.options.ours_label);
self.emit_section(ours);
if self.options.style == ConflictStyle::Diff3 {
self.ensure_newline();
self.write_marker(b'|', self.options.base_label);
self.emit_section(base);
}
self.ensure_newline();
self.write_divider();
self.emit_section(theirs);
self.ensure_newline();
self.write_marker(b'>', self.options.theirs_label);
}
fn emit_section(&mut self, lines: &[DiffLine<'_>]) {
for line in lines {
self.out.extend_from_slice(line.content);
}
}
fn ensure_newline(&mut self) {
if !self.out.is_empty() && self.out.last() != Some(&b'\n') {
self.out.push(b'\n');
}
}
fn write_marker(&mut self, ch: u8, label: &str) {
for _ in 0..7 {
self.out.push(ch);
}
if !label.is_empty() {
self.out.push(b' ');
self.out.extend_from_slice(label.as_bytes());
}
self.out.push(b'\n');
}
fn write_divider(&mut self) {
for _ in 0..7 {
self.out.push(b'=');
}
self.out.push(b'\n');
}
fn finish(self) -> MergeBlobResult {
MergeBlobResult {
content: self.out,
conflicted: self.conflicted,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiffAlgorithm {
Myers,
Minimal,
Patience,
Histogram,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FileChange {
Add { path: RepoPath },
Delete { path: RepoPath },
Modify { path: RepoPath },
Rename { old: RepoPath, new: RepoPath },
Copy { source: RepoPath, dest: RepoPath },
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Conflict {
pub path: RepoPath,
pub ours: Vec<u8>,
pub theirs: Vec<u8>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NameStatus {
Added,
Deleted,
Modified,
Renamed(u8),
Copied(u8),
Unmerged,
}
impl NameStatus {
pub const fn code(self) -> char {
match self {
Self::Added => 'A',
Self::Deleted => 'D',
Self::Modified => 'M',
Self::Renamed(_) => 'R',
Self::Copied(_) => 'C',
Self::Unmerged => 'U',
}
}
pub fn label(self) -> String {
match self {
Self::Renamed(score) => format!("R{score:03}"),
Self::Copied(score) => format!("C{score:03}"),
_ => self.code().to_string(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NameStatusEntry {
pub status: NameStatus,
pub path: BString,
pub old_path: Option<BString>,
pub old_mode: Option<u32>,
pub new_mode: Option<u32>,
pub old_oid: Option<ObjectId>,
pub new_oid: Option<ObjectId>,
}
impl NameStatusEntry {
pub fn line(&self) -> String {
if let Some(old_path) = &self.old_path {
format!(
"{}\t{}\t{}",
self.status.label(),
String::from_utf8_lossy(old_path.as_bytes()),
String::from_utf8_lossy(self.path.as_bytes())
)
} else {
format!(
"{}\t{}",
self.status.label(),
String::from_utf8_lossy(self.path.as_bytes())
)
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct IndexGitlinkEntry {
pub path: BString,
pub oid: ObjectId,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct IndexWorktreeDiff {
pub entries: Vec<NameStatusEntry>,
pub staged_gitlinks: Vec<IndexGitlinkEntry>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct DiffNameStatusOptions {
pub detect_renames: bool,
pub detect_copies: bool,
pub find_copies_harder: bool,
pub rename_empty: bool,
}
impl Default for DiffNameStatusOptions {
fn default() -> Self {
Self {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
}
}
}
pub const DEFAULT_RENAME_THRESHOLD: u8 = 50;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RenameDetectionOptions {
pub base: DiffNameStatusOptions,
pub detect_inexact: bool,
pub rename_threshold: u8,
pub copy_threshold: u8,
}
impl Default for RenameDetectionOptions {
fn default() -> Self {
Self {
base: DiffNameStatusOptions::default(),
detect_inexact: false,
rename_threshold: DEFAULT_RENAME_THRESHOLD,
copy_threshold: DEFAULT_RENAME_THRESHOLD,
}
}
}
impl RenameDetectionOptions {
pub fn inexact(base: DiffNameStatusOptions) -> Self {
Self {
base,
detect_inexact: true,
..Self::default()
}
}
}
pub fn diff_name_status_head_worktree(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
) -> Result<Vec<NameStatusEntry>> {
diff_name_status_head_worktree_with_options(
worktree_root,
git_dir,
format,
DiffNameStatusOptions::default(),
)
}
pub fn diff_name_status_head_worktree_with_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let worktree_root = worktree_root.as_ref();
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let head = head_tree_entries(git_dir, format, &db)?;
let IndexSnapshot {
entries: index,
stat_cache,
} = read_index_snapshot(git_dir, format)?;
let index_gitlinks = index_gitlinks(&index);
let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
let worktree = worktree_entries_for_path_set(
worktree_root,
format,
&candidate_paths,
&index_gitlinks,
Some(&stat_cache),
)?;
let changes = diff_name_status_maps_for_path_set(&head, &worktree, &candidate_paths, options)?;
Ok(mark_unstaged_worktree_oids_unresolved(
changes, &index, &worktree,
))
}
pub fn diff_name_status_head_worktree_with_rename_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
let worktree_root = worktree_root.as_ref();
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let head = head_tree_entries(git_dir, format, &db)?;
let IndexSnapshot {
entries: index,
stat_cache,
} = read_index_snapshot(git_dir, format)?;
let index_gitlinks = index_gitlinks(&index);
let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
let worktree = worktree_entries_for_path_set(
worktree_root,
format,
&candidate_paths,
&index_gitlinks,
Some(&stat_cache),
)?;
let cache = worktree_blob_cache_for_path_set(
worktree_root,
&head,
&worktree,
&candidate_paths,
options,
)?;
let changes = diff_name_status_maps_with_renames_for_path_set(
&head,
&worktree,
&candidate_paths,
options,
|oid| cache_or_odb_blob(&cache, &db, oid),
)?;
Ok(mark_unstaged_worktree_oids_unresolved(
changes, &index, &worktree,
))
}
pub fn diff_name_status_head_index(
git_dir: impl AsRef<Path>,
format: ObjectFormat,
) -> Result<Vec<NameStatusEntry>> {
diff_name_status_head_index_with_options(git_dir, format, DiffNameStatusOptions::default())
}
pub fn diff_name_status_head_index_with_options(
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let head = head_tree_entries(git_dir, format, &db)?;
let index = read_index_entries(git_dir, format)?;
diff_name_status_maps(&head, &index, head.keys().chain(index.keys()), options)
}
pub fn diff_name_status_head_index_with_rename_options(
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let head = head_tree_entries(git_dir, format, &db)?;
let index = read_index_entries(git_dir, format)?;
diff_name_status_maps_with_renames(
&head,
&index,
head.keys().chain(index.keys()),
options,
|oid| read_blob_bytes(&db, oid),
)
}
fn tree_entries(
tree_oid: &ObjectId,
format: ObjectFormat,
db: &FileObjectDatabase,
) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
let mut entries = BTreeMap::new();
if *tree_oid == empty_tree_oid(format)? {
return Ok(entries);
}
collect_tree_entries(db, format, tree_oid, Vec::new(), &mut entries)?;
Ok(entries)
}
fn empty_tree_oid(format: ObjectFormat) -> Result<ObjectId> {
object_id_for_bytes(format, "tree", b"")
}
pub fn diff_name_status_tree_index_with_options(
git_dir: impl AsRef<Path>,
format: ObjectFormat,
tree_oid: &ObjectId,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let tree = tree_entries(tree_oid, format, &db)?;
let index = read_index_entries(git_dir, format)?;
diff_name_status_maps(&tree, &index, tree.keys().chain(index.keys()), options)
}
pub fn diff_name_status_tree_index_with_rename_options(
git_dir: impl AsRef<Path>,
format: ObjectFormat,
tree_oid: &ObjectId,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let tree = tree_entries(tree_oid, format, &db)?;
let index = read_index_entries(git_dir, format)?;
diff_name_status_maps_with_renames(
&tree,
&index,
tree.keys().chain(index.keys()),
options,
|oid| read_blob_bytes(&db, oid),
)
}
pub fn diff_name_status_tree_worktree_with_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
tree_oid: &ObjectId,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let worktree_root = worktree_root.as_ref();
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let tree = tree_entries(tree_oid, format, &db)?;
let IndexSnapshot {
entries: index,
stat_cache,
} = read_index_snapshot(git_dir, format)?;
let index_gitlinks = index_gitlinks(&index);
let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
let worktree = worktree_entries_for_path_set(
worktree_root,
format,
&candidate_paths,
&index_gitlinks,
Some(&stat_cache),
)?;
let changes = diff_name_status_maps_for_path_set(&tree, &worktree, &candidate_paths, options)?;
Ok(mark_unstaged_worktree_oids_unresolved(
changes, &index, &worktree,
))
}
pub fn diff_name_status_tree_worktree_with_rename_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
tree_oid: &ObjectId,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
let worktree_root = worktree_root.as_ref();
let git_dir = git_dir.as_ref();
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let tree = tree_entries(tree_oid, format, &db)?;
let IndexSnapshot {
entries: index,
stat_cache,
} = read_index_snapshot(git_dir, format)?;
let index_gitlinks = index_gitlinks(&index);
let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
let worktree = worktree_entries_for_path_set(
worktree_root,
format,
&candidate_paths,
&index_gitlinks,
Some(&stat_cache),
)?;
let cache = worktree_blob_cache_for_path_set(
worktree_root,
&tree,
&worktree,
&candidate_paths,
options,
)?;
let changes = diff_name_status_maps_with_renames_for_path_set(
&tree,
&worktree,
&candidate_paths,
options,
|oid| cache_or_odb_blob(&cache, &db, oid),
)?;
Ok(mark_unstaged_worktree_oids_unresolved(
changes, &index, &worktree,
))
}
pub fn diff_name_status_index_worktree(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
) -> Result<Vec<NameStatusEntry>> {
diff_name_status_index_worktree_with_options(
worktree_root,
git_dir,
format,
DiffNameStatusOptions::default(),
)
}
pub fn diff_name_status_index_worktree_with_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
Ok(diff_name_status_index_worktree_with_options_and_gitlinks(
worktree_root,
git_dir,
format,
options,
)?
.entries)
}
pub fn diff_name_status_index_worktree_with_options_and_gitlinks(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: DiffNameStatusOptions,
) -> Result<IndexWorktreeDiff> {
let IndexWorktreeDiff {
entries,
staged_gitlinks,
} = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
let entries = apply_name_status_options_to_index_worktree_changes(entries, options)?;
Ok(IndexWorktreeDiff {
entries,
staged_gitlinks,
})
}
pub fn diff_name_status_index_worktree_with_rename_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
Ok(
diff_name_status_index_worktree_with_rename_options_and_gitlinks(
worktree_root,
git_dir,
format,
options,
)?
.entries,
)
}
pub fn diff_name_status_index_worktree_with_rename_options_and_gitlinks(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: RenameDetectionOptions,
) -> Result<IndexWorktreeDiff> {
let IndexWorktreeDiff {
entries,
staged_gitlinks,
} = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
let entries = apply_name_status_options_to_index_worktree_changes(entries, options.base)?;
Ok(IndexWorktreeDiff {
entries,
staged_gitlinks,
})
}
fn diff_name_status_index_worktree_changes(
worktree_root: &Path,
git_dir: &Path,
format: ObjectFormat,
) -> Result<IndexWorktreeDiff> {
let index_path = sley_index::repository_index_path(git_dir);
let index_metadata = match fs::metadata(&index_path) {
Ok(metadata) => metadata,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Ok(IndexWorktreeDiff {
entries: Vec::new(),
staged_gitlinks: Vec::new(),
});
}
Err(err) => return Err(err.into()),
};
let index_bytes = fs::read(&index_path)?;
if let Ok(index) = BorrowedIndex::parse(&index_bytes, format) {
let (has_non_normal_stage, staged_gitlinks) =
index_worktree_metadata_for_entries(&index.entries);
if has_non_normal_stage {
return diff_name_status_index_worktree_changes_from_snapshot(
worktree_root,
git_dir,
format,
);
}
let stat_cache =
IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
let entries = diff_name_status_index_worktree_changes_for_borrowed_entries(
worktree_root,
format,
&index.entries,
&stat_cache,
)?;
return Ok(IndexWorktreeDiff {
entries,
staged_gitlinks,
});
}
let index = Index::parse(&index_bytes, format)?;
let (has_non_normal_stage, staged_gitlinks) =
index_worktree_metadata_for_entries(&index.entries);
if has_non_normal_stage {
return diff_name_status_index_worktree_changes_from_snapshot(
worktree_root,
git_dir,
format,
);
}
let stat_cache =
IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
let entries = diff_name_status_index_worktree_changes_for_entries(
worktree_root,
format,
&index.entries,
&stat_cache,
)?;
Ok(IndexWorktreeDiff {
entries,
staged_gitlinks,
})
}
fn diff_name_status_index_worktree_changes_for_borrowed_entries(
worktree_root: &Path,
format: ObjectFormat,
entries: &[sley_index::IndexEntryRef<'_>],
stat_cache: &IndexStatCache,
) -> Result<Vec<NameStatusEntry>> {
const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
let workers = std::thread::available_parallelism()
.map(|count| count.get())
.unwrap_or(1)
.min(8);
if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
return diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
worktree_root,
format,
entries,
stat_cache,
);
}
let chunk_size = entries.len().div_ceil(workers);
std::thread::scope(|scope| {
let mut handles = Vec::new();
for chunk in entries.chunks(chunk_size) {
handles.push(scope.spawn(move || {
diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
worktree_root,
format,
chunk,
stat_cache,
)
}));
}
let mut changes = Vec::new();
for handle in handles {
let chunk_changes = handle
.join()
.map_err(|_| GitError::Command("diff worker panicked".into()))??;
changes.extend(chunk_changes);
}
Ok(changes)
})
}
fn diff_name_status_index_worktree_changes_for_entries(
worktree_root: &Path,
format: ObjectFormat,
entries: &[sley_index::IndexEntry],
stat_cache: &IndexStatCache,
) -> Result<Vec<NameStatusEntry>> {
const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
let workers = std::thread::available_parallelism()
.map(|count| count.get())
.unwrap_or(1)
.min(8);
if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
return diff_name_status_index_worktree_changes_for_entry_chunk(
worktree_root,
format,
entries,
stat_cache,
);
}
let chunk_size = entries.len().div_ceil(workers);
std::thread::scope(|scope| {
let mut handles = Vec::new();
for chunk in entries.chunks(chunk_size) {
handles.push(scope.spawn(move || {
diff_name_status_index_worktree_changes_for_entry_chunk(
worktree_root,
format,
chunk,
stat_cache,
)
}));
}
let mut changes = Vec::new();
for handle in handles {
let chunk_changes = handle
.join()
.map_err(|_| GitError::Command("diff worker panicked".into()))??;
changes.extend(chunk_changes);
}
Ok(changes)
})
}
fn diff_name_status_index_worktree_changes_for_entry_chunk(
worktree_root: &Path,
format: ObjectFormat,
entries: &[sley_index::IndexEntry],
stat_cache: &IndexStatCache,
) -> Result<Vec<NameStatusEntry>> {
let mut changes = Vec::new();
let mut path = PathBuf::from(worktree_root);
for entry in entries {
worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path.as_bytes());
if let Some(change) = index_worktree_change_for_entry(&path, format, entry, &stat_cache)? {
changes.push(change);
}
}
Ok(changes)
}
fn diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
worktree_root: &Path,
format: ObjectFormat,
entries: &[sley_index::IndexEntryRef<'_>],
stat_cache: &IndexStatCache,
) -> Result<Vec<NameStatusEntry>> {
let mut changes = Vec::new();
let mut path = PathBuf::from(worktree_root);
for entry in entries {
worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path);
if let Some(change) = index_worktree_change_for_entry(&path, format, entry, &stat_cache)? {
changes.push(change);
}
}
Ok(changes)
}
fn index_worktree_metadata_for_entries(
entries: &[impl WorktreeIndexEntry],
) -> (bool, Vec<IndexGitlinkEntry>) {
let mut needs_snapshot = false;
let mut staged_gitlinks = Vec::new();
for entry in entries {
if entry.stage() != sley_index::Stage::Normal {
needs_snapshot = true;
}
if entry.is_intent_to_add() {
needs_snapshot = true;
}
if sley_index::is_gitlink(entry.mode()) {
staged_gitlinks.push(IndexGitlinkEntry {
path: BString::from_bytes(entry.git_path()),
oid: entry.oid(),
});
}
}
(needs_snapshot, staged_gitlinks)
}
fn diff_name_status_index_worktree_changes_from_snapshot(
worktree_root: &Path,
git_dir: &Path,
format: ObjectFormat,
) -> Result<IndexWorktreeDiff> {
let IndexSnapshot {
entries: index,
stat_cache,
} = read_index_snapshot(git_dir, format)?;
let intent_to_add_paths = read_intent_to_add_paths(git_dir, format)?;
let unmerged = read_unmerged_stages(git_dir, format)?;
let index_gitlinks = index_gitlinks(&index);
let staged_gitlinks = index_gitlinks
.iter()
.map(|(path, oid)| IndexGitlinkEntry {
path: BString::from_bytes(path),
oid: *oid,
})
.collect();
let mut changes = Vec::new();
for (git_path, left) in &index {
let conflict_stages = unmerged.get(git_path);
let right = worktree_entry_for_path(
worktree_root,
format,
git_path,
&index_gitlinks,
Some(&stat_cache),
)?;
if conflict_stages.is_some() {
changes.push(NameStatusEntry {
status: NameStatus::Unmerged,
path: git_path.clone().into(),
old_path: None,
old_mode: None,
new_mode: right.as_ref().map(|entry| entry.mode),
old_oid: None,
new_oid: None,
});
}
let left = match conflict_stages {
Some(stages) => match stages.ours.as_ref() {
Some(ours) => ours,
None => continue,
},
None => left,
};
if intent_to_add_paths.contains(git_path.as_slice()) {
if let Some(right) = right {
changes.push(NameStatusEntry {
status: NameStatus::Added,
path: git_path.clone().into(),
old_path: None,
old_mode: None,
new_mode: Some(right.mode),
old_oid: None,
new_oid: Some(right.oid),
});
}
continue;
}
let Some(right) = right else {
changes.push(NameStatusEntry {
status: NameStatus::Deleted,
path: git_path.clone().into(),
old_path: None,
old_mode: Some(left.mode),
new_mode: None,
old_oid: Some(left.oid),
new_oid: None,
});
continue;
};
if right != *left {
changes.push(NameStatusEntry {
status: NameStatus::Modified,
path: git_path.clone().into(),
old_path: None,
old_mode: Some(left.mode),
new_mode: Some(right.mode),
old_oid: Some(left.oid),
new_oid: Some(right.oid),
});
}
}
Ok(IndexWorktreeDiff {
entries: changes,
staged_gitlinks,
})
}
struct ConflictStages {
ours: Option<TrackedEntry>,
}
fn read_unmerged_stages(
git_dir: &Path,
format: ObjectFormat,
) -> Result<BTreeMap<Vec<u8>, ConflictStages>> {
let index_path = sley_index::repository_index_path(git_dir);
let index_bytes = match fs::read(&index_path) {
Ok(bytes) => bytes,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(BTreeMap::new()),
Err(err) => return Err(err.into()),
};
let index = sley_index::Index::parse(&index_bytes, format)?;
let mut out: BTreeMap<Vec<u8>, ConflictStages> = BTreeMap::new();
for entry in &index.entries {
let stage = entry.stage();
if stage == sley_index::Stage::Normal {
continue;
}
let path = entry.path.clone().into_bytes();
let slot = out.entry(path).or_insert(ConflictStages { ours: None });
if stage == sley_index::Stage::Ours {
slot.ours = Some(TrackedEntry {
mode: entry.mode,
oid: entry.oid,
});
}
}
Ok(out)
}
fn apply_name_status_options_to_index_worktree_changes(
mut changes: Vec<NameStatusEntry>,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
if options.detect_renames || options.detect_copies {
changes.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
}
Ok(changes)
}
pub fn diff_name_status_index_worktree_for_diff_files_with_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let worktree_root = worktree_root.as_ref();
let git_dir = git_dir.as_ref();
let changes =
diff_name_status_index_worktree_with_options(worktree_root, git_dir, format, options)?;
augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
}
pub fn diff_name_status_index_worktree_for_diff_files_with_rename_options(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
let worktree_root = worktree_root.as_ref();
let git_dir = git_dir.as_ref();
let changes = diff_name_status_index_worktree_with_rename_options(
worktree_root,
git_dir,
format,
options,
)?;
augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
}
fn augment_with_stat_dirty_entries(
worktree_root: &Path,
git_dir: &Path,
format: ObjectFormat,
mut content_changes: Vec<NameStatusEntry>,
) -> Result<Vec<NameStatusEntry>> {
let IndexSnapshot {
entries: index,
stat_cache,
} = read_index_snapshot(git_dir, format)?;
let already_reported: BTreeSet<&[u8]> = content_changes
.iter()
.map(|entry| entry.path.as_bytes())
.collect();
let mut extras = Vec::new();
for (git_path, tracked) in &index {
if already_reported.contains(git_path.as_slice()) {
continue;
}
let Some(cached) = stat_cache.entry_for_git_path(git_path) else {
continue;
};
if sley_index::is_gitlink(tracked.mode) {
continue;
}
let path = worktree_path_for_repo_path(worktree_root, git_path);
let Ok(metadata) = fs::symlink_metadata(&path) else {
continue;
};
if !(metadata.is_file() || metadata.file_type().is_symlink()) {
continue;
}
match stat_cache.index_entry_worktree_stat_verdict(cached, &metadata) {
sley_index::StatVerdict::Clean => continue,
sley_index::StatVerdict::Dirty => {}
sley_index::StatVerdict::RacyNeedsContentCheck => {
if worktree_oid_matches_index(worktree_root, git_path, &metadata, tracked, format)? {
continue;
}
}
}
extras.push(NameStatusEntry {
status: NameStatus::Modified,
path: git_path.clone().into(),
old_path: None,
old_mode: Some(tracked.mode),
new_mode: Some(tracked.mode),
old_oid: Some(tracked.oid),
new_oid: None,
});
}
if !extras.is_empty() {
content_changes.extend(extras);
content_changes
.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
}
Ok(content_changes)
}
fn worktree_oid_matches_index(
worktree_root: &Path,
git_path: &[u8],
metadata: &fs::Metadata,
index_entry: &TrackedEntry,
format: ObjectFormat,
) -> Result<bool> {
let file_type = metadata.file_type();
let path = worktree_path_for_repo_path(worktree_root, git_path);
let body = if file_type.is_symlink() {
symlink_target_bytes(&path)?
} else {
fs::read(&path)?
};
let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
let mode = if file_type.is_symlink() {
0o120000
} else {
file_mode(metadata)
};
Ok(oid == index_entry.oid && mode == index_entry.mode)
}
pub fn diff_name_status_trees_with_options(
db: &FileObjectDatabase,
format: ObjectFormat,
left_tree: &ObjectId,
right_tree: &ObjectId,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let needs_full_maps = options.detect_copies && options.find_copies_harder;
let (left_entries, right_entries) = if needs_full_maps {
collect_full_tree_pair(db, format, left_tree, right_tree)?
} else {
changed_tree_entries(db, format, left_tree, right_tree)?
};
diff_name_status_maps(
&left_entries,
&right_entries,
left_entries.keys().chain(right_entries.keys()),
options,
)
}
pub fn diff_name_status_empty_tree_with_options(
db: &FileObjectDatabase,
format: ObjectFormat,
right_tree: &ObjectId,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let left_entries = BTreeMap::new();
let mut right_entries = BTreeMap::new();
collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
diff_name_status_maps(&left_entries, &right_entries, right_entries.keys(), options)
}
pub fn diff_name_status_trees_with_rename_options(
db: &FileObjectDatabase,
format: ObjectFormat,
left_tree: &ObjectId,
right_tree: &ObjectId,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
let needs_full_maps = options.base.detect_copies && options.base.find_copies_harder;
let (left_entries, right_entries) = if needs_full_maps {
collect_full_tree_pair(db, format, left_tree, right_tree)?
} else {
changed_tree_entries(db, format, left_tree, right_tree)?
};
diff_name_status_maps_with_renames(
&left_entries,
&right_entries,
left_entries.keys().chain(right_entries.keys()),
options,
|oid| read_blob_bytes(db, oid),
)
}
pub fn diff_name_status_empty_tree_with_rename_options(
db: &FileObjectDatabase,
format: ObjectFormat,
right_tree: &ObjectId,
options: RenameDetectionOptions,
) -> Result<Vec<NameStatusEntry>> {
let left_entries = BTreeMap::new();
let mut right_entries = BTreeMap::new();
collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
diff_name_status_maps_with_renames(
&left_entries,
&right_entries,
right_entries.keys(),
options,
|oid| read_blob_bytes(db, oid),
)
}
fn read_blob_bytes(db: &FileObjectDatabase, oid: &ObjectId) -> Option<Vec<u8>> {
match db.read_object(oid) {
Ok(object) if object.object_type == ObjectType::Blob => Some(object.body.clone()),
_ => None,
}
}
fn raw_name_status_changes_for_unique_paths<'a>(
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
paths: impl Iterator<Item = &'a Vec<u8>>,
) -> Vec<NameStatusEntry> {
let mut changes = Vec::new();
for path in paths {
let left = left_entries.get(path);
let right = right_entries.get(path);
let status = match (left, right) {
(None, Some(_)) => Some(NameStatus::Added),
(Some(_), None) => Some(NameStatus::Deleted),
(Some(left), Some(right)) if left != right => Some(NameStatus::Modified),
_ => None,
};
if let Some(status) = status {
changes.push(NameStatusEntry {
status,
path: path.clone().into(),
old_path: None,
old_mode: left.map(|entry| entry.mode),
new_mode: right.map(|entry| entry.mode),
old_oid: left.map(|entry| entry.oid),
new_oid: right.map(|entry| entry.oid),
});
}
}
changes
}
fn diff_name_status_maps<'a>(
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let paths = candidate_path_set(candidate_paths);
diff_name_status_maps_for_path_set(left_entries, right_entries, &paths, options)
}
fn diff_name_status_maps_for_path_set(
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: &BTreeSet<Vec<u8>>,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
diff_name_status_maps_for_unique_paths(
left_entries,
right_entries,
candidate_paths.iter(),
options,
)
}
fn diff_name_status_maps_for_unique_paths<'a>(
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
options: DiffNameStatusOptions,
) -> Result<Vec<NameStatusEntry>> {
let mut changes =
raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
if options.detect_renames {
changes = detect_exact_renames(changes, left_entries, right_entries, options.rename_empty);
}
if options.detect_copies {
changes = detect_exact_copies(
changes,
left_entries,
right_entries,
options.find_copies_harder,
options.rename_empty,
);
}
Ok(changes)
}
fn diff_name_status_maps_with_renames<'a>(
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
options: RenameDetectionOptions,
fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
) -> Result<Vec<NameStatusEntry>> {
let paths = candidate_path_set(candidate_paths);
diff_name_status_maps_with_renames_for_path_set(
left_entries,
right_entries,
&paths,
options,
fetch_blob,
)
}
fn diff_name_status_maps_with_renames_for_path_set(
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: &BTreeSet<Vec<u8>>,
options: RenameDetectionOptions,
fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
) -> Result<Vec<NameStatusEntry>> {
diff_name_status_maps_with_renames_for_unique_paths(
left_entries,
right_entries,
candidate_paths.iter(),
options,
fetch_blob,
)
}
fn diff_name_status_maps_with_renames_for_unique_paths<'a>(
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
options: RenameDetectionOptions,
fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
) -> Result<Vec<NameStatusEntry>> {
let base = options.base;
let mut changes =
raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
if base.detect_renames {
changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
}
if base.detect_renames && options.detect_inexact {
changes = detect_inexact_renames(changes, &options, &fetch_blob);
}
if base.detect_copies {
changes = detect_exact_copies(
changes,
left_entries,
right_entries,
base.find_copies_harder,
base.rename_empty,
);
}
if base.detect_copies && options.detect_inexact {
changes = detect_inexact_copies(changes, left_entries, &options, &fetch_blob);
}
Ok(changes)
}
fn detect_exact_renames(
changes: Vec<NameStatusEntry>,
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
rename_empty: bool,
) -> Vec<NameStatusEntry> {
let added = changes
.iter()
.enumerate()
.filter(|(_, entry)| entry.status == NameStatus::Added)
.map(|(idx, entry)| (idx, entry.path.clone()))
.collect::<Vec<_>>();
let deleted = changes
.iter()
.filter(|entry| entry.status == NameStatus::Deleted)
.map(|entry| entry.path.clone())
.collect::<Vec<_>>();
let mut consumed = BTreeSet::new();
let mut renamed_old_paths = BTreeSet::new();
let mut result = Vec::new();
for old_path in deleted {
let Some(left) = left_entries.get(old_path.as_bytes()) else {
continue;
};
if let Some((idx, new_path)) = added.iter().find(|(idx, new_path)| {
!consumed.contains(idx)
&& right_entries.get(new_path.as_bytes()).is_some_and(|right| {
right.oid == left.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
})
}) {
consumed.insert(*idx);
renamed_old_paths.insert(old_path.clone());
let right = right_entries.get(new_path.as_bytes());
result.push(NameStatusEntry {
status: NameStatus::Renamed(100),
path: new_path.clone(),
old_path: Some(old_path),
old_mode: Some(left.mode),
new_mode: right.map(|entry| entry.mode),
old_oid: Some(left.oid),
new_oid: right.map(|entry| entry.oid),
});
}
}
for (idx, entry) in changes.into_iter().enumerate() {
if entry.status == NameStatus::Added && consumed.contains(&idx) {
continue;
}
if entry.status == NameStatus::Deleted && renamed_old_paths.contains(&entry.path) {
continue;
}
result.push(entry);
}
result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
result
}
fn detect_exact_copies(
changes: Vec<NameStatusEntry>,
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
find_copies_harder: bool,
rename_empty: bool,
) -> Vec<NameStatusEntry> {
let changed_sources = changes
.iter()
.filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
.map(|entry| entry.path.clone())
.collect::<BTreeSet<_>>();
let source_paths = left_entries
.keys()
.filter(|path| find_copies_harder || changed_sources.contains(path.as_slice()))
.cloned()
.collect::<Vec<_>>();
let mut result = Vec::new();
for entry in changes {
if entry.status != NameStatus::Added {
result.push(entry);
continue;
}
let Some(right) = right_entries.get(entry.path.as_bytes()) else {
result.push(entry);
continue;
};
if let Some(old_path) = source_paths.iter().find(|old_path| {
old_path.as_slice() != entry.path.as_bytes()
&& left_entries.get(*old_path).is_some_and(|left| {
left.oid == right.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
})
}) {
result.push(NameStatusEntry {
status: NameStatus::Copied(100),
path: entry.path,
old_path: Some(old_path.clone().into()),
old_mode: left_entries
.get(old_path.as_slice())
.map(|entry| entry.mode),
new_mode: entry.new_mode,
old_oid: left_entries.get(old_path.as_slice()).map(|entry| entry.oid),
new_oid: entry.new_oid,
});
} else {
result.push(entry);
}
}
result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
result
}
#[derive(Debug, Clone)]
struct RenameSourceMeta {
path: BString,
mode: Option<u32>,
oid: Option<ObjectId>,
}
struct ScoredPair {
src: usize,
dst: usize,
score: u8,
}
fn detect_inexact_renames(
changes: Vec<NameStatusEntry>,
options: &RenameDetectionOptions,
fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
) -> Vec<NameStatusEntry> {
let threshold = options.rename_threshold;
if threshold > 100 {
return changes;
}
let mut deleted: Vec<(usize, Vec<u8>)> = Vec::new();
let mut added: Vec<(usize, Vec<u8>)> = Vec::new();
for (idx, entry) in changes.iter().enumerate() {
match entry.status {
NameStatus::Deleted => {
let Some(oid) = entry.old_oid.as_ref() else {
continue;
};
if !options.base.rename_empty && is_empty_blob_oid(oid) {
continue;
}
if let Some(bytes) = fetch_blob(oid) {
deleted.push((idx, bytes));
}
}
NameStatus::Added => {
let Some(oid) = entry.new_oid.as_ref() else {
continue;
};
if !options.base.rename_empty && is_empty_blob_oid(oid) {
continue;
}
if let Some(bytes) = fetch_blob(oid) {
added.push((idx, bytes));
}
}
_ => {}
}
}
if deleted.is_empty() || added.is_empty() {
return changes;
}
let mut pairs: Vec<ScoredPair> = Vec::new();
for (si, (_, src_bytes)) in deleted.iter().enumerate() {
for (di, (_, dst_bytes)) in added.iter().enumerate() {
let score = blob_similarity(src_bytes, dst_bytes);
if score >= threshold {
pairs.push(ScoredPair {
src: si,
dst: di,
score,
});
}
}
}
pairs.sort_by(|a, b| {
b.score
.cmp(&a.score)
.then_with(|| a.src.cmp(&b.src))
.then_with(|| a.dst.cmp(&b.dst))
});
let mut src_used = vec![false; deleted.len()];
let mut dst_used = vec![false; added.len()];
let mut rename_of: BTreeMap<usize, (usize, u8)> = BTreeMap::new();
for pair in pairs {
if src_used[pair.src] || dst_used[pair.dst] {
continue;
}
src_used[pair.src] = true;
dst_used[pair.dst] = true;
let src_change_idx = deleted[pair.src].0;
let dst_change_idx = added[pair.dst].0;
rename_of.insert(dst_change_idx, (src_change_idx, pair.score));
}
if rename_of.is_empty() {
return changes;
}
let consumed_sources: BTreeSet<usize> =
rename_of.values().map(|(src_idx, _)| *src_idx).collect();
let source_meta: BTreeMap<usize, RenameSourceMeta> = consumed_sources
.iter()
.map(|&src_idx| {
let src = &changes[src_idx];
(
src_idx,
RenameSourceMeta {
path: src.path.clone(),
mode: src.old_mode,
oid: src.old_oid,
},
)
})
.collect();
let mut result = Vec::with_capacity(changes.len());
for (idx, entry) in changes.into_iter().enumerate() {
if consumed_sources.contains(&idx) {
continue;
}
if let Some((src_idx, score)) = rename_of.get(&idx) {
let meta = source_meta
.get(src_idx)
.cloned()
.unwrap_or(RenameSourceMeta {
path: BString::default(),
mode: None,
oid: None,
});
result.push(NameStatusEntry {
status: NameStatus::Renamed(*score),
path: entry.path,
old_path: Some(meta.path),
old_mode: meta.mode,
new_mode: entry.new_mode,
old_oid: meta.oid,
new_oid: entry.new_oid,
});
continue;
}
result.push(entry);
}
result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
result
}
fn detect_inexact_copies(
changes: Vec<NameStatusEntry>,
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
options: &RenameDetectionOptions,
fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
) -> Vec<NameStatusEntry> {
let threshold = options.copy_threshold;
if threshold > 100 {
return changes;
}
let changed_sources = changes
.iter()
.filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
.map(|entry| entry.path.clone())
.collect::<BTreeSet<_>>();
let mut sources: Vec<(Vec<u8>, &TrackedEntry, Vec<u8>)> = Vec::new();
for (path, tracked) in left_entries {
if !(options.base.find_copies_harder || changed_sources.contains(path.as_slice())) {
continue;
}
if !options.base.rename_empty && is_empty_blob_oid(&tracked.oid) {
continue;
}
if let Some(bytes) = fetch_blob(&tracked.oid) {
sources.push((path.clone(), tracked, bytes));
}
}
if sources.is_empty() {
return changes;
}
let mut result = Vec::with_capacity(changes.len());
for entry in changes {
if entry.status != NameStatus::Added {
result.push(entry);
continue;
}
let Some(new_oid) = entry.new_oid.as_ref() else {
result.push(entry);
continue;
};
let Some(dst_bytes) = fetch_blob(new_oid) else {
result.push(entry);
continue;
};
let mut best: Option<(usize, u8)> = None;
for (i, (src_path, _, src_bytes)) in sources.iter().enumerate() {
if src_path.as_slice() == entry.path.as_bytes() {
continue;
}
let score = blob_similarity(src_bytes, &dst_bytes);
if score < threshold {
continue;
}
match best {
Some((_, best_score)) if best_score >= score => {}
_ => best = Some((i, score)),
}
}
if let Some((src_idx, score)) = best {
let (src_path, src_tracked, _) = &sources[src_idx];
result.push(NameStatusEntry {
status: NameStatus::Copied(score),
path: entry.path,
old_path: Some(src_path.clone().into()),
old_mode: Some(src_tracked.mode),
new_mode: entry.new_mode,
old_oid: Some(src_tracked.oid),
new_oid: entry.new_oid,
});
} else {
result.push(entry);
}
}
result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
result
}
fn is_empty_blob_oid(oid: &ObjectId) -> bool {
object_id_for_bytes(oid.format(), "blob", b"").is_ok_and(|empty| empty == *oid)
}
const MAX_SPAN_BYTES: usize = 64;
pub fn blob_similarity(a: &[u8], b: &[u8]) -> u8 {
if a == b {
return 100;
}
let max_size = a.len().max(b.len());
if max_size == 0 {
return 100;
}
let src = span_hash_counts(a);
let dst = span_hash_counts(b);
let common = common_span_bytes(&src, &dst);
const MAX_SCORE: u64 = 60000;
let internal = (common as u64 * MAX_SCORE) / max_size as u64;
let score = internal * 100 / MAX_SCORE;
score.min(100) as u8
}
fn span_hash_counts(data: &[u8]) -> BTreeMap<u64, usize> {
let mut counts: BTreeMap<u64, usize> = BTreeMap::new();
let mut idx = 0usize;
let len = data.len();
while idx < len {
let mut accum1: u32 = 0;
let mut accum2: u32 = 0;
let mut span_len = 0usize;
loop {
let c = data[idx] as u32;
idx += 1;
span_len += 1;
accum1 = (accum1 << 7) ^ (accum2 >> 25);
accum2 = (accum2 << 7) ^ (accum1 >> 25);
accum1 = accum1.wrapping_add(c);
let newline = c == u32::from(b'\n');
if span_len >= MAX_SPAN_BYTES || newline || idx >= len {
break;
}
}
let hash = ((accum1 as u64) << 32) ^ (accum2 as u64) ^ ((span_len as u64) << 1);
*counts.entry(hash).or_insert(0) += span_len;
}
counts
}
pub fn count_changes(src: &[u8], dst: &[u8]) -> (usize, usize) {
let src_counts = span_hash_counts(src);
let dst_counts = span_hash_counts(dst);
let copied = common_span_bytes(&src_counts, &dst_counts);
(copied, dst.len() - copied)
}
fn common_span_bytes(src: &BTreeMap<u64, usize>, dst: &BTreeMap<u64, usize>) -> usize {
let mut common = 0usize;
let (small, large) = if src.len() <= dst.len() {
(src, dst)
} else {
(dst, src)
};
for (hash, small_bytes) in small {
if let Some(large_bytes) = large.get(hash) {
common += (*small_bytes).min(*large_bytes);
}
}
common
}
fn diff_entry_sort_path(entry: &NameStatusEntry) -> &[u8] {
entry.path.as_bytes()
}
fn mark_unstaged_worktree_oids_unresolved(
changes: Vec<NameStatusEntry>,
index_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
worktree_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
) -> Vec<NameStatusEntry> {
changes
.into_iter()
.map(|mut entry| {
let worktree_entry = worktree_entries.get(entry.path.as_bytes());
if worktree_entry != index_entries.get(entry.path.as_bytes()) {
entry.new_oid = None;
}
entry
})
.collect()
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct TrackedEntry {
mode: u32,
oid: ObjectId,
}
type TrackedEntryMap = BTreeMap<Vec<u8>, TrackedEntry>;
type TrackedEntryPair = (TrackedEntryMap, TrackedEntryMap);
struct IndexSnapshot {
entries: BTreeMap<Vec<u8>, TrackedEntry>,
stat_cache: IndexStatCache,
}
fn read_index_entries(
git_dir: &Path,
format: ObjectFormat,
) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
Ok(read_index_snapshot(git_dir, format)?.entries)
}
fn read_intent_to_add_paths(
git_dir: &Path,
format: ObjectFormat,
) -> Result<std::collections::HashSet<Vec<u8>>> {
let index_path = sley_index::repository_index_path(git_dir);
if !index_path.exists() {
return Ok(std::collections::HashSet::new());
}
let index = Index::parse(&fs::read(&index_path)?, format)?;
Ok(index
.entries
.iter()
.filter(|entry| entry.stage() == sley_index::Stage::Normal && entry.is_intent_to_add())
.map(|entry| entry.path.as_bytes().to_vec())
.collect())
}
fn read_index_snapshot(git_dir: &Path, format: ObjectFormat) -> Result<IndexSnapshot> {
let index_path = sley_index::repository_index_path(git_dir);
let index_metadata = match fs::metadata(&index_path) {
Ok(metadata) => metadata,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Ok(IndexSnapshot {
entries: BTreeMap::new(),
stat_cache: IndexStatCache::default(),
});
}
Err(err) => return Err(err.into()),
};
let index = Index::parse(&fs::read(&index_path)?, format)?;
let stat_cache =
IndexStatCache::from_index_mtime(&index, sley_index::file_mtime_parts(&index_metadata));
let entries = index
.entries
.into_iter()
.map(|entry| {
(
entry.path.into_bytes(),
TrackedEntry {
mode: entry.mode,
oid: entry.oid,
},
)
})
.collect();
Ok(IndexSnapshot {
entries,
stat_cache,
})
}
trait WorktreeIndexEntry {
fn git_path(&self) -> &[u8];
fn stage(&self) -> sley_index::Stage;
fn mode(&self) -> u32;
fn oid(&self) -> ObjectId;
fn is_intent_to_add(&self) -> bool;
fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool;
}
impl WorktreeIndexEntry for sley_index::IndexEntry {
fn git_path(&self) -> &[u8] {
self.path.as_bytes()
}
fn stage(&self) -> sley_index::Stage {
sley_index::IndexEntry::stage(self)
}
fn mode(&self) -> u32 {
self.mode
}
fn oid(&self) -> ObjectId {
self.oid
}
fn is_intent_to_add(&self) -> bool {
sley_index::IndexEntry::is_intent_to_add(self)
}
fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
stat_cache.reusable_index_entry(self, metadata).is_some()
}
}
impl WorktreeIndexEntry for sley_index::IndexEntryRef<'_> {
fn git_path(&self) -> &[u8] {
self.path
}
fn stage(&self) -> sley_index::Stage {
sley_index::IndexEntryRef::stage(self)
}
fn mode(&self) -> u32 {
self.mode
}
fn oid(&self) -> ObjectId {
self.oid
}
fn is_intent_to_add(&self) -> bool {
sley_index::IndexEntryRef::is_intent_to_add(self)
}
fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
stat_cache.reusable_index_entry_ref(self, metadata)
}
}
fn tracked_entry_from_index(entry: &impl WorktreeIndexEntry) -> TrackedEntry {
TrackedEntry {
mode: entry.mode(),
oid: entry.oid(),
}
}
fn head_tree_entries(
git_dir: &Path,
format: ObjectFormat,
db: &FileObjectDatabase,
) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
let refs = FileRefStore::new(git_dir, format);
let Some(head) = refs.read_ref("HEAD")? else {
return Ok(BTreeMap::new());
};
let commit_oid = match head {
RefTarget::Direct(oid) => Some(oid),
RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
Some(RefTarget::Direct(oid)) => Some(oid),
_ => None,
},
};
let Some(commit_oid) = commit_oid else {
return Ok(BTreeMap::new());
};
let object = db.read_object(&commit_oid)?;
if object.object_type != ObjectType::Commit {
return Err(GitError::InvalidObject(format!(
"HEAD {commit_oid} is not a commit"
)));
}
let commit = Commit::parse_ref(format, &object.body)?;
let mut entries = BTreeMap::new();
collect_tree_entries(db, format, &commit.tree, Vec::new(), &mut entries)?;
Ok(entries)
}
fn collect_tree_entries(
db: &FileObjectDatabase,
format: ObjectFormat,
tree_oid: &ObjectId,
prefix: Vec<u8>,
entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
) -> Result<()> {
for (rel_path, (mode, oid)) in flatten_tree(db, format, tree_oid)? {
let path = join_tree_path(&prefix, &rel_path);
entries.insert(path, TrackedEntry { mode, oid });
}
Ok(())
}
const TREE_ENTRY_MODE: u32 = 0o040000;
fn read_tree_object(
db: &FileObjectDatabase,
format: ObjectFormat,
tree_oid: &ObjectId,
) -> Result<Tree> {
let object = db.read_object(tree_oid)?;
if object.object_type != ObjectType::Tree {
return Err(GitError::InvalidObject(format!(
"expected tree {tree_oid}, found {}",
object.object_type.as_str()
)));
}
Tree::parse(format, &object.body)
}
fn join_tree_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
let mut path = Vec::with_capacity(prefix.len() + 1 + name.len());
path.extend_from_slice(prefix);
if !path.is_empty() {
path.push(b'/');
}
path.extend_from_slice(name);
path
}
fn collect_full_tree_pair(
db: &FileObjectDatabase,
format: ObjectFormat,
left_tree: &ObjectId,
right_tree: &ObjectId,
) -> Result<TrackedEntryPair> {
let mut left = BTreeMap::new();
collect_tree_entries(db, format, left_tree, Vec::new(), &mut left)?;
let mut right = BTreeMap::new();
collect_tree_entries(db, format, right_tree, Vec::new(), &mut right)?;
Ok((left, right))
}
fn changed_tree_entries(
db: &FileObjectDatabase,
format: ObjectFormat,
left_tree: &ObjectId,
right_tree: &ObjectId,
) -> Result<TrackedEntryPair> {
let mut left = BTreeMap::new();
let mut right = BTreeMap::new();
if left_tree != right_tree {
diff_tree_pair(
db,
format,
left_tree,
right_tree,
&[],
&mut left,
&mut right,
)?;
}
Ok((left, right))
}
fn diff_tree_pair(
db: &FileObjectDatabase,
format: ObjectFormat,
left_tree: &ObjectId,
right_tree: &ObjectId,
prefix: &[u8],
left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
) -> Result<()> {
let left_entries = read_tree_object(db, format, left_tree)?.entries;
let right_entries = read_tree_object(db, format, right_tree)?.entries;
let mut right_by_name: HashMap<&[u8], &TreeEntry> = HashMap::with_capacity(right_entries.len());
for entry in &right_entries {
right_by_name.insert(entry.name.as_bytes(), entry);
}
for left_entry in &left_entries {
match right_by_name.remove(left_entry.name.as_bytes()) {
Some(right_entry) => {
merge_tree_entry(
db,
format,
prefix,
Some(left_entry),
Some(right_entry),
left,
right,
)?;
}
None => {
merge_tree_entry(db, format, prefix, Some(left_entry), None, left, right)?;
}
}
}
for right_entry in &right_entries {
if right_by_name.contains_key(right_entry.name.as_bytes()) {
merge_tree_entry(db, format, prefix, None, Some(right_entry), left, right)?;
}
}
Ok(())
}
fn merge_tree_entry(
db: &FileObjectDatabase,
format: ObjectFormat,
prefix: &[u8],
left_entry: Option<&TreeEntry>,
right_entry: Option<&TreeEntry>,
left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
) -> Result<()> {
let left_is_tree = left_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
let right_is_tree = right_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
if let (Some(left_entry), Some(right_entry)) = (left_entry, right_entry) {
if left_is_tree && right_is_tree {
if left_entry.oid == right_entry.oid {
return Ok(());
}
let path = join_tree_path(prefix, left_entry.name.as_bytes());
return diff_tree_pair(
db,
format,
&left_entry.oid,
&right_entry.oid,
&path,
left,
right,
);
}
if !left_is_tree && !right_is_tree {
if left_entry.mode == right_entry.mode && left_entry.oid == right_entry.oid {
return Ok(());
}
let path = join_tree_path(prefix, left_entry.name.as_bytes());
left.insert(
path.clone(),
TrackedEntry {
mode: left_entry.mode,
oid: left_entry.oid,
},
);
right.insert(
path,
TrackedEntry {
mode: right_entry.mode,
oid: right_entry.oid,
},
);
return Ok(());
}
}
if let Some(left_entry) = left_entry {
let path = join_tree_path(prefix, left_entry.name.as_bytes());
if left_is_tree {
collect_tree_entries(db, format, &left_entry.oid, path, left)?;
} else {
left.insert(
path,
TrackedEntry {
mode: left_entry.mode,
oid: left_entry.oid,
},
);
}
}
if let Some(right_entry) = right_entry {
let path = join_tree_path(prefix, right_entry.name.as_bytes());
if right_is_tree {
collect_tree_entries(db, format, &right_entry.oid, path, right)?;
} else {
right.insert(
path,
TrackedEntry {
mode: right_entry.mode,
oid: right_entry.oid,
},
);
}
}
Ok(())
}
fn index_gitlinks(index: &BTreeMap<Vec<u8>, TrackedEntry>) -> BTreeMap<Vec<u8>, ObjectId> {
index
.iter()
.filter(|(_, entry)| sley_index::is_gitlink(entry.mode))
.map(|(path, entry)| (path.clone(), entry.oid))
.collect()
}
fn candidate_path_set<'a>(candidate_paths: impl Iterator<Item = &'a Vec<u8>>) -> BTreeSet<Vec<u8>> {
candidate_paths.cloned().collect()
}
fn worktree_entries_for_path_set(
worktree_root: &Path,
format: ObjectFormat,
candidates: &BTreeSet<Vec<u8>>,
index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
stat_cache: Option<&IndexStatCache>,
) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
worktree_entries_for_unique_paths(
worktree_root,
format,
candidates.iter(),
index_gitlinks,
stat_cache,
)
}
fn worktree_entries_for_unique_paths<'a>(
worktree_root: &Path,
format: ObjectFormat,
candidates: impl Iterator<Item = &'a Vec<u8>>,
index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
stat_cache: Option<&IndexStatCache>,
) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
let mut entries = BTreeMap::new();
for git_path in candidates {
if let Some(entry) =
worktree_entry_for_path(worktree_root, format, &git_path, index_gitlinks, stat_cache)?
{
entries.insert(git_path.clone(), entry);
}
}
Ok(entries)
}
fn worktree_entry_for_path(
worktree_root: &Path,
format: ObjectFormat,
git_path: &[u8],
index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
stat_cache: Option<&IndexStatCache>,
) -> Result<Option<TrackedEntry>> {
let path = worktree_path_for_repo_path(worktree_root, git_path);
let metadata = match fs::symlink_metadata(&path) {
Ok(metadata) => metadata,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(GitError::Io(err.to_string())),
};
let file_type = metadata.file_type();
if let Some(staged_oid) = index_gitlinks.get(git_path)
&& metadata.is_dir()
{
let oid = gitlink_head_oid(&path, format).unwrap_or(*staged_oid);
return Ok(Some(TrackedEntry {
mode: sley_index::GITLINK_MODE,
oid,
}));
}
if metadata.is_dir() {
if let Some(oid) = gitlink_head_oid(&path, format) {
return Ok(Some(TrackedEntry {
mode: sley_index::GITLINK_MODE,
oid,
}));
}
return Ok(None);
}
if !(metadata.is_file() || file_type.is_symlink()) {
return Ok(None);
}
if let Some(entry) = stat_cache.and_then(|cache| cache.reusable_entry(git_path, &metadata)) {
return Ok(Some(tracked_entry_from_index(entry)));
}
let body = if file_type.is_symlink() {
symlink_target_bytes(&path)?
} else {
fs::read(&path)?
};
let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
let mode = if file_type.is_symlink() {
0o120000
} else {
file_mode(&metadata)
};
Ok(Some(TrackedEntry { mode, oid }))
}
fn index_worktree_change_for_entry(
path: &Path,
format: ObjectFormat,
index_entry: &impl WorktreeIndexEntry,
stat_cache: &IndexStatCache,
) -> Result<Option<NameStatusEntry>> {
let git_path = index_entry.git_path();
let metadata = match fs::symlink_metadata(path) {
Ok(metadata) => metadata,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Ok(Some(index_worktree_deleted_entry(index_entry)));
}
Err(err) => return Err(GitError::Io(err.to_string())),
};
let file_type = metadata.file_type();
let right = if metadata.is_dir() {
if sley_index::is_gitlink(index_entry.mode()) {
let oid = gitlink_head_oid(path, format).unwrap_or(index_entry.oid());
Some(TrackedEntry {
mode: sley_index::GITLINK_MODE,
oid,
})
} else if let Some(oid) = gitlink_head_oid(path, format) {
Some(TrackedEntry {
mode: sley_index::GITLINK_MODE,
oid,
})
} else {
None
}
} else if metadata.is_file() || file_type.is_symlink() {
if index_entry.reusable_with(stat_cache, &metadata) {
return Ok(None);
}
let body = if file_type.is_symlink() {
symlink_target_bytes(path)?
} else {
fs::read(path)?
};
let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
let mode = if file_type.is_symlink() {
0o120000
} else {
file_mode(&metadata)
};
Some(TrackedEntry { mode, oid })
} else {
None
};
let Some(right) = right else {
return Ok(Some(index_worktree_deleted_entry(index_entry)));
};
let left = tracked_entry_from_index(index_entry);
if right == left {
return Ok(None);
}
Ok(Some(NameStatusEntry {
status: NameStatus::Modified,
path: git_path.to_vec().into(),
old_path: None,
old_mode: Some(left.mode),
new_mode: Some(right.mode),
old_oid: Some(left.oid),
new_oid: Some(right.oid),
}))
}
fn index_worktree_deleted_entry(index_entry: &impl WorktreeIndexEntry) -> NameStatusEntry {
NameStatusEntry {
status: NameStatus::Deleted,
path: index_entry.git_path().to_vec().into(),
old_path: None,
old_mode: Some(index_entry.mode()),
new_mode: None,
old_oid: Some(index_entry.oid()),
new_oid: None,
}
}
fn worktree_blob_cache_for_path_set(
worktree_root: &Path,
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: &BTreeSet<Vec<u8>>,
options: RenameDetectionOptions,
) -> Result<HashMap<ObjectId, Vec<u8>>> {
worktree_blob_cache_for_unique_paths(
worktree_root,
left_entries,
right_entries,
candidate_paths.iter(),
options,
)
}
fn worktree_blob_cache_for_unique_paths<'a>(
worktree_root: &Path,
left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
options: RenameDetectionOptions,
) -> Result<HashMap<ObjectId, Vec<u8>>> {
if !options.detect_inexact || !(options.base.detect_renames || options.base.detect_copies) {
return Ok(HashMap::new());
}
let base = options.base;
let mut changes =
raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
if base.detect_renames {
changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
}
if base.detect_copies {
changes = detect_exact_copies(
changes,
left_entries,
right_entries,
base.find_copies_harder,
base.rename_empty,
);
}
let has_rename_source = base.detect_renames
&& changes.iter().any(|entry| {
entry.status == NameStatus::Deleted
&& entry
.old_oid
.as_ref()
.is_some_and(|oid| base.rename_empty || !is_empty_blob_oid(oid))
});
let has_copy_source = base.detect_copies
&& (base.find_copies_harder
|| changes
.iter()
.any(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified)));
if !has_rename_source && !has_copy_source {
return Ok(HashMap::new());
}
let candidate_oids = changes
.iter()
.filter(|entry| entry.status == NameStatus::Added)
.filter_map(|entry| entry.new_oid)
.filter(|oid| base.rename_empty || !is_empty_blob_oid(oid))
.collect::<BTreeSet<_>>();
if candidate_oids.is_empty() {
return Ok(HashMap::new());
}
let mut cache = HashMap::new();
for (git_path, entry) in right_entries {
if sley_index::is_gitlink(entry.mode) || !candidate_oids.contains(&entry.oid) {
continue;
}
let path = worktree_path_for_repo_path(worktree_root, git_path);
let body = if entry.mode == 0o120000 {
symlink_target_bytes(&path)?
} else {
fs::read(&path)?
};
cache.entry(entry.oid).or_insert(body);
}
Ok(cache)
}
fn cache_or_odb_blob(
cache: &HashMap<ObjectId, Vec<u8>>,
db: &FileObjectDatabase,
oid: &ObjectId,
) -> Option<Vec<u8>> {
if let Some(bytes) = cache.get(oid) {
return Some(bytes.clone());
}
read_blob_bytes(db, oid)
}
#[cfg(unix)]
fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let mut out = PathBuf::from(worktree_root);
out.push(OsStr::from_bytes(path));
out
}
#[cfg(unix)]
fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
out.clear();
out.push(worktree_root);
out.push(OsStr::from_bytes(path));
}
#[cfg(not(unix))]
fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
worktree_root.join(repo_path_to_path(path))
}
#[cfg(not(unix))]
fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
out.clear();
out.push(worktree_root);
out.push(repo_path_to_path(path));
}
#[cfg(not(unix))]
fn repo_path_to_path(path: &[u8]) -> PathBuf {
let mut out = PathBuf::new();
for component in String::from_utf8_lossy(path).split('/') {
if !component.is_empty() {
out.push(component);
}
}
out
}
#[cfg(unix)]
fn file_mode(metadata: &fs::Metadata) -> u32 {
use std::os::unix::fs::PermissionsExt;
if metadata.permissions().mode() & 0o111 != 0 {
0o100755
} else {
0o100644
}
}
#[cfg(not(unix))]
fn file_mode(_metadata: &fs::Metadata) -> u32 {
0o100644
}
#[cfg(unix)]
fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
use std::os::unix::ffi::OsStrExt;
let target = fs::read_link(path)?;
Ok(target.as_os_str().as_bytes().to_vec())
}
#[cfg(not(unix))]
fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
let target = fs::read_link(path)?;
Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HunkLine {
Context(Vec<u8>),
Insert(Vec<u8>),
Delete(Vec<u8>),
}
impl HunkLine {
pub fn content(&self) -> &[u8] {
match self {
Self::Context(bytes) | Self::Insert(bytes) | Self::Delete(bytes) => bytes,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Hunk {
pub old_start: usize,
pub old_len: usize,
pub new_start: usize,
pub new_len: usize,
pub lines: Vec<HunkLine>,
pub old_no_newline: bool,
pub new_no_newline: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FilePatch {
pub old_path: Option<Vec<u8>>,
pub new_path: Option<Vec<u8>>,
pub old_mode: Option<u32>,
pub new_mode: Option<u32>,
pub hunks: Vec<Hunk>,
pub is_new: bool,
pub is_delete: bool,
pub is_rename: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ApplyOutcome {
Applied(Vec<u8>),
Rejected,
}
const MIN_FUZZ_CONTEXT: usize = usize::MAX;
pub fn parse_unified_patch(input: &[u8]) -> Result<Vec<FilePatch>> {
let lines = split_patch_lines(input);
let mut parser = PatchParser {
lines: &lines,
index: 0,
};
parser.parse()
}
pub fn apply_file_patch(base: &[u8], patch: &FilePatch) -> ApplyOutcome {
if patch.is_delete && patch.hunks.is_empty() {
return ApplyOutcome::Applied(Vec::new());
}
let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
let mut image = split_blob_lines(base_for_match);
let mut running_offset: isize = 0;
for hunk in &patch.hunks {
match apply_one_hunk(&mut image, hunk, running_offset) {
Some(drift) => running_offset += drift,
None => return ApplyOutcome::Rejected,
}
}
ApplyOutcome::Applied(join_lines(&image))
}
fn apply_one_hunk(image: &mut Vec<Line>, hunk: &Hunk, running_offset: isize) -> Option<isize> {
let mut preimage: Vec<Line> = Vec::new();
let mut postimage: Vec<Line> = Vec::new();
let mut leading = 0usize; let mut trailing = 0usize; let mut seen_change = false;
for hl in &hunk.lines {
match hl {
HunkLine::Context(bytes) => {
preimage.push(Line {
content: bytes.clone(),
no_newline: false,
});
postimage.push(Line {
content: bytes.clone(),
no_newline: false,
});
if !seen_change {
leading += 1;
}
trailing += 1;
}
HunkLine::Delete(bytes) => {
preimage.push(Line {
content: bytes.clone(),
no_newline: false,
});
seen_change = true;
trailing = 0;
}
HunkLine::Insert(bytes) => {
postimage.push(Line {
content: bytes.clone(),
no_newline: false,
});
seen_change = true;
trailing = 0;
}
}
}
if hunk.old_no_newline && let Some(last) = preimage.last_mut() {
last.no_newline = true;
}
if hunk.new_no_newline && let Some(last) = postimage.last_mut() {
last.no_newline = true;
}
let mut match_beginning = hunk.old_start <= 1;
let mut match_end = trailing == 0;
let mut expected = expected_position(hunk, running_offset);
let hunk_expected = expected;
loop {
if let Some(pos) = find_hunk_pos(image, &preimage, expected, match_beginning, match_end) {
let take = preimage.len();
let replacement: Vec<Line> = postimage.clone();
image.splice(pos..pos + take, replacement);
return Some(pos as isize - hunk_expected);
}
#[allow(clippy::absurd_extreme_comparisons)]
if leading <= MIN_FUZZ_CONTEXT && trailing <= MIN_FUZZ_CONTEXT {
return None;
}
if match_beginning || match_end {
match_beginning = false;
match_end = false;
continue;
}
if leading >= trailing {
preimage.remove(0);
postimage.remove(0);
expected -= 1;
leading -= 1;
}
if trailing > leading {
preimage.pop();
postimage.pop();
trailing -= 1;
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct Line {
content: Vec<u8>,
no_newline: bool,
}
fn split_blob_lines(data: &[u8]) -> Vec<Line> {
let mut lines = Vec::new();
let mut start = 0usize;
while start < data.len() {
match data[start..].iter().position(|&b| b == b'\n') {
Some(rel) => {
let end = start + rel;
lines.push(Line {
content: data[start..end].to_vec(),
no_newline: false,
});
start = end + 1;
}
None => {
lines.push(Line {
content: data[start..].to_vec(),
no_newline: true,
});
start = data.len();
}
}
}
lines
}
fn join_lines(lines: &[Line]) -> Vec<u8> {
let mut out = Vec::new();
for line in lines {
out.extend_from_slice(&line.content);
if !line.no_newline {
out.push(b'\n');
}
}
out
}
fn expected_position(hunk: &Hunk, running_offset: isize) -> isize {
let base = if hunk.old_start == 0 {
0
} else {
hunk.old_start as isize - 1
};
base + running_offset
}
fn find_hunk_pos(
image: &[Line],
preimage: &[Line],
expected: isize,
match_beginning: bool,
match_end: bool,
) -> Option<usize> {
let line_nr = image.len();
let pre_nr = preimage.len();
let mut line: isize = if match_beginning {
0
} else if match_end {
line_nr as isize - pre_nr as isize
} else {
expected
};
if line < 0 {
line = 0;
}
if line as usize > line_nr {
line = line_nr as isize;
}
let start = line as usize;
let mut backwards = start;
let mut forwards = start;
let mut current = start;
let mut i: u64 = 0;
loop {
if preimage_matches_at(image, preimage, current, match_beginning, match_end) {
return Some(current);
}
loop {
if backwards == 0 && forwards == line_nr {
return None;
}
if i & 1 == 1 {
if backwards == 0 {
i += 1;
continue;
}
backwards -= 1;
current = backwards;
} else {
if forwards == line_nr {
i += 1;
continue;
}
forwards += 1;
current = forwards;
}
break;
}
i += 1;
}
}
fn preimage_matches_at(
image: &[Line],
preimage: &[Line],
pos: usize,
match_beginning: bool,
match_end: bool,
) -> bool {
if match_beginning && pos != 0 {
return false;
}
if pos + preimage.len() > image.len() {
return false;
}
if match_end && pos + preimage.len() != image.len() {
return false;
}
for (i, pre) in preimage.iter().enumerate() {
let img = &image[pos + i];
if img.content != pre.content {
return false;
}
if pre.no_newline != img.no_newline {
return false;
}
}
true
}
fn split_patch_lines(input: &[u8]) -> Vec<&[u8]> {
let mut lines = Vec::new();
let mut start = 0usize;
while start < input.len() {
match input[start..].iter().position(|&b| b == b'\n') {
Some(rel) => {
let end = start + rel;
lines.push(&input[start..end]);
start = end + 1;
}
None => {
lines.push(&input[start..]);
start = input.len();
}
}
}
lines
}
struct PatchParser<'a> {
lines: &'a [&'a [u8]],
index: usize,
}
impl<'a> PatchParser<'a> {
fn parse(&mut self) -> Result<Vec<FilePatch>> {
let mut patches = Vec::new();
while self.index < self.lines.len() {
let line = self.lines[self.index];
if line.starts_with(b"diff --git ") {
patches.push(self.parse_file(Some(line))?);
} else if line.starts_with(b"--- ") {
patches.push(self.parse_file(None)?);
} else if line.starts_with(b"@@ ") {
return Err(GitError::InvalidFormat(
"hunk header encountered before any file header".to_string(),
));
} else {
self.index += 1;
}
}
Ok(patches)
}
fn parse_file(&mut self, diff_line: Option<&[u8]>) -> Result<FilePatch> {
let mut patch = FilePatch {
old_path: None,
new_path: None,
old_mode: None,
new_mode: None,
hunks: Vec::new(),
is_new: false,
is_delete: false,
is_rename: false,
};
if let Some(diff_line) = diff_line {
if let Some((a, b)) = parse_diff_git_paths(diff_line) {
patch.old_path = Some(a);
patch.new_path = Some(b);
}
self.index += 1;
}
while self.index < self.lines.len() {
let line = self.lines[self.index];
if line.starts_with(b"--- ") {
self.parse_old_file_header(line, &mut patch);
self.index += 1;
break;
} else if line.starts_with(b"@@ ") {
break;
} else if line.starts_with(b"diff --git ") {
return Ok(patch);
} else if let Some(rest) = strip_prefix(line, b"old mode ") {
patch.old_mode = parse_octal(rest);
} else if let Some(rest) = strip_prefix(line, b"new mode ") {
patch.new_mode = parse_octal(rest);
} else if let Some(rest) = strip_prefix(line, b"new file mode ") {
patch.is_new = true;
patch.new_mode = parse_octal(rest);
} else if let Some(rest) = strip_prefix(line, b"deleted file mode ") {
patch.is_delete = true;
patch.old_mode = parse_octal(rest);
} else if let Some(rest) = strip_prefix(line, b"rename from ") {
patch.is_rename = true;
patch.old_path = Some(rest.to_vec());
} else if let Some(rest) = strip_prefix(line, b"rename to ") {
patch.is_rename = true;
patch.new_path = Some(rest.to_vec());
} else {
self.index += 1;
continue;
}
self.index += 1;
}
if self.index < self.lines.len() && self.lines[self.index].starts_with(b"+++ ") {
self.parse_new_file_header(self.lines[self.index], &mut patch);
self.index += 1;
}
while self.index < self.lines.len() {
let line = self.lines[self.index];
if line.starts_with(b"@@ ") {
let hunk = self.parse_hunk()?;
patch.hunks.push(hunk);
} else if line.starts_with(b"diff --git ") {
break;
} else if line.starts_with(b"--- ") {
break;
} else {
self.index += 1;
}
}
Ok(patch)
}
fn parse_old_file_header(&self, line: &[u8], patch: &mut FilePatch) {
let rest = strip_prefix(line, b"--- ").unwrap_or(line);
let path = strip_header_path(rest);
match path {
HeaderPath::DevNull => {
patch.is_new = true;
patch.old_path = None;
}
HeaderPath::Path(p) => {
if patch.old_path.is_none() || !patch.is_rename {
patch.old_path = Some(p);
}
}
}
}
fn parse_new_file_header(&self, line: &[u8], patch: &mut FilePatch) {
let rest = strip_prefix(line, b"+++ ").unwrap_or(line);
let path = strip_header_path(rest);
match path {
HeaderPath::DevNull => {
patch.is_delete = true;
patch.new_path = None;
}
HeaderPath::Path(p) => {
if patch.new_path.is_none() || !patch.is_rename {
patch.new_path = Some(p);
}
}
}
}
fn parse_hunk(&mut self) -> Result<Hunk> {
let header = self.lines[self.index];
let (old_start, old_len, new_start, new_len) = parse_hunk_header(header)?;
self.index += 1;
let mut hunk = Hunk {
old_start,
old_len,
new_start,
new_len,
lines: Vec::new(),
old_no_newline: false,
new_no_newline: false,
};
let mut old_seen = 0usize;
let mut new_seen = 0usize;
while self.index < self.lines.len() {
if old_seen >= old_len && new_seen >= new_len {
break;
}
let line = self.lines[self.index];
if line.is_empty() {
hunk.lines.push(HunkLine::Context(Vec::new()));
old_seen += 1;
new_seen += 1;
self.index += 1;
continue;
}
match line[0] {
b' ' => {
hunk.lines.push(HunkLine::Context(line[1..].to_vec()));
old_seen += 1;
new_seen += 1;
}
b'+' => {
hunk.lines.push(HunkLine::Insert(line[1..].to_vec()));
new_seen += 1;
}
b'-' => {
hunk.lines.push(HunkLine::Delete(line[1..].to_vec()));
old_seen += 1;
}
b'\\' => {
self.mark_no_newline(&mut hunk);
self.index += 1;
continue;
}
_ => {
break;
}
}
self.index += 1;
}
if self.index < self.lines.len() && self.lines[self.index].starts_with(b"\\") {
self.mark_no_newline(&mut hunk);
self.index += 1;
}
if old_seen != old_len || new_seen != new_len {
return Err(GitError::InvalidFormat(format!(
"hunk body line counts mismatch: header declared -{old_len},+{new_len} \
but body had -{old_seen},+{new_seen}"
)));
}
Ok(hunk)
}
fn mark_no_newline(&self, hunk: &mut Hunk) {
match hunk.lines.last() {
Some(HunkLine::Context(_)) => {
hunk.old_no_newline = true;
hunk.new_no_newline = true;
}
Some(HunkLine::Insert(_)) => hunk.new_no_newline = true,
Some(HunkLine::Delete(_)) => hunk.old_no_newline = true,
None => {}
}
}
}
enum HeaderPath {
DevNull,
Path(Vec<u8>),
}
fn strip_header_path(rest: &[u8]) -> HeaderPath {
let path = match rest.iter().position(|&b| b == b'\t') {
Some(tab) => &rest[..tab],
None => rest,
};
let path = trim_ascii_end(path);
if path == b"/dev/null" {
return HeaderPath::DevNull;
}
let stripped = if path.starts_with(b"a/") || path.starts_with(b"b/") {
&path[2..]
} else {
path
};
HeaderPath::Path(stripped.to_vec())
}
fn parse_diff_git_paths(line: &[u8]) -> Option<(Vec<u8>, Vec<u8>)> {
let rest = strip_prefix(line, b"diff --git ")?;
if rest.first() == Some(&b'"') {
return None;
}
if !rest.starts_with(b"a/") {
return None;
}
let sep = find_subslice(rest, b" b/")?;
let a = &rest[2..sep];
let b = &rest[sep + 3..];
Some((a.to_vec(), b.to_vec()))
}
fn parse_hunk_header(line: &[u8]) -> Result<(usize, usize, usize, usize)> {
let err = || GitError::InvalidFormat(format!("malformed hunk header: {}", lossy(line)));
let rest = strip_prefix(line, b"@@ ").ok_or_else(err)?;
let close = find_subslice(rest, b" @@").ok_or_else(err)?;
let ranges = &rest[..close];
let mut parts = ranges.split(|&b| b == b' ').filter(|p| !p.is_empty());
let old = parts.next().ok_or_else(err)?;
let new = parts.next().ok_or_else(err)?;
let old = strip_prefix(old, b"-").ok_or_else(err)?;
let new = strip_prefix(new, b"+").ok_or_else(err)?;
let (old_start, old_len) = parse_range(old).ok_or_else(err)?;
let (new_start, new_len) = parse_range(new).ok_or_else(err)?;
Ok((old_start, old_len, new_start, new_len))
}
fn parse_range(range: &[u8]) -> Option<(usize, usize)> {
match range.iter().position(|&b| b == b',') {
Some(comma) => {
let start = parse_usize(&range[..comma])?;
let len = parse_usize(&range[comma + 1..])?;
Some((start, len))
}
None => Some((parse_usize(range)?, 1)),
}
}
fn parse_usize(bytes: &[u8]) -> Option<usize> {
if bytes.is_empty() {
return None;
}
let mut value: usize = 0;
for &b in bytes {
if !b.is_ascii_digit() {
return None;
}
value = value.checked_mul(10)?.checked_add((b - b'0') as usize)?;
}
Some(value)
}
fn parse_octal(bytes: &[u8]) -> Option<u32> {
let trimmed = trim_ascii_end(bytes);
if trimmed.is_empty() {
return None;
}
let mut value: u32 = 0;
for &b in trimmed {
if !(b'0'..=b'7').contains(&b) {
return None;
}
value = value.checked_mul(8)?.checked_add((b - b'0') as u32)?;
}
Some(value)
}
fn strip_prefix<'b>(line: &'b [u8], prefix: &[u8]) -> Option<&'b [u8]> {
if line.starts_with(prefix) {
Some(&line[prefix.len()..])
} else {
None
}
}
fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
if needle.is_empty() || needle.len() > haystack.len() {
return None;
}
haystack
.windows(needle.len())
.position(|window| window == needle)
}
fn trim_ascii_end(bytes: &[u8]) -> &[u8] {
let mut end = bytes.len();
while end > 0 && (bytes[end - 1] == b' ' || bytes[end - 1] == b'\r') {
end -= 1;
}
&bytes[..end]
}
fn lossy(bytes: &[u8]) -> String {
String::from_utf8_lossy(bytes).into_owned()
}
pub type MergeEntryMap = BTreeMap<Vec<u8>, (u32, ObjectId)>;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum MergeFavor {
None,
Ours,
Theirs,
}
pub struct MergeTreesOptions<'a> {
pub ours_label: &'a str,
pub theirs_label: &'a str,
pub ancestor_label: &'a str,
pub favor: MergeFavor,
pub detect_renames: bool,
pub rename_threshold: u8,
pub directory_renames: DirectoryRenames,
pub style: ConflictStyle,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
pub enum DirectoryRenames {
#[default]
False,
True,
Conflict,
}
impl Default for MergeTreesOptions<'_> {
fn default() -> Self {
Self {
ours_label: "ours",
theirs_label: "theirs",
ancestor_label: "merged common ancestors",
favor: MergeFavor::None,
detect_renames: false,
rename_threshold: DEFAULT_RENAME_THRESHOLD,
directory_renames: DirectoryRenames::False,
style: ConflictStyle::Merge,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MergeConflictKind {
Content { add_add: bool },
ModifyDelete {
deleted_in: String,
modified_in: String,
},
RenameContent {
old_path: Vec<u8>,
},
RenameDelete {
old_path: Vec<u8>,
renamed_in: String,
deleted_in: String,
},
FileDirectory {
original_path: Vec<u8>,
moved_from: String,
},
DirRenameLocation {
old_path: Vec<u8>,
renamed_from: Option<Vec<u8>>,
added_in: String,
dir_renamed_in: String,
},
DirRenameImplicitCollision {
sources: Vec<Vec<u8>>,
},
}
#[derive(Debug, Clone)]
pub struct MergedPath {
pub path: Vec<u8>,
pub stages: MergeStages,
pub result: Option<(u32, ObjectId)>,
pub worktree: Option<(u32, Vec<u8>)>,
pub conflict: Option<MergeConflictKind>,
pub auto_merged: bool,
}
impl MergedPath {
pub fn is_clean(&self) -> bool {
self.conflict.is_none()
}
}
#[derive(Debug, Clone, Default)]
pub struct MergeStages {
pub base: Option<(u32, ObjectId)>,
pub ours: Option<(u32, ObjectId)>,
pub theirs: Option<(u32, ObjectId)>,
}
#[derive(Debug, Clone)]
pub struct MergeTreesResult {
pub tree: ObjectId,
pub paths: Vec<MergedPath>,
pub clean: bool,
}
impl MergeTreesResult {
pub fn conflicts(&self) -> impl Iterator<Item = &MergedPath> {
self.paths.iter().filter(|entry| entry.conflict.is_some())
}
}
pub fn flatten_tree(
reader: &impl ObjectReader,
format: ObjectFormat,
tree_oid: &ObjectId,
) -> Result<MergeEntryMap> {
let mut entries = BTreeMap::new();
if *tree_oid == empty_tree_oid(format)? {
return Ok(entries);
}
collect_flat_tree(reader, format, tree_oid, Vec::new(), &mut entries)?;
Ok(entries)
}
fn collect_flat_tree(
reader: &impl ObjectReader,
format: ObjectFormat,
tree_oid: &ObjectId,
prefix: Vec<u8>,
entries: &mut MergeEntryMap,
) -> Result<()> {
let object = reader.read_object(tree_oid)?;
if object.object_type != ObjectType::Tree {
return Err(GitError::InvalidObject(format!(
"expected tree {}, found {}",
tree_oid,
object.object_type.as_str()
)));
}
for entry in TreeEntries::new(format, &object.body) {
let entry = entry?;
let mut path = prefix.clone();
if !path.is_empty() {
path.push(b'/');
}
path.extend_from_slice(entry.name);
if entry.mode == 0o040000 {
collect_flat_tree(reader, format, &entry.oid, path, entries)?;
} else {
entries.insert(path, (entry.mode, entry.oid));
}
}
Ok(())
}
pub fn is_mergeable_file_mode(mode: u32) -> bool {
mode == 0o100644 || mode == 0o100755
}
pub fn merge_trees(
db: &FileObjectDatabase,
format: ObjectFormat,
base: Option<&ObjectId>,
ours: &ObjectId,
theirs: &ObjectId,
options: &MergeTreesOptions<'_>,
) -> Result<MergeTreesResult> {
let base_map = match base {
Some(tree) => flatten_tree(db, format, tree)?,
None => MergeEntryMap::new(),
};
let ours_map = flatten_tree(db, format, ours)?;
let theirs_map = flatten_tree(db, format, theirs)?;
merge_entry_maps(db, format, &base_map, &ours_map, &theirs_map, options)
}
pub fn merge_entry_maps(
db: &FileObjectDatabase,
format: ObjectFormat,
base_map: &MergeEntryMap,
ours_map: &MergeEntryMap,
theirs_map: &MergeEntryMap,
options: &MergeTreesOptions<'_>,
) -> Result<MergeTreesResult> {
let (renames, side_renames) = if options.detect_renames {
let (renames, ours_side, theirs_side) =
detect_merge_renames(db, format, base_map, ours_map, theirs_map, options)?;
(renames, Some((ours_side, theirs_side)))
} else {
(MergeRenames::default(), None)
};
let (mut eff_base, mut eff_ours, mut eff_theirs) =
apply_merge_renames(base_map, ours_map, theirs_map, &renames);
let mut dir_rename_dirty = false;
let mut rehomed_paths: BTreeMap<Vec<u8>, RehomeInfo> = BTreeMap::new();
let mut dir_rename_collisions: Vec<DirRenameCollision> = Vec::new();
if options.directory_renames != DirectoryRenames::False
&& let Some((ours_side, theirs_side)) = &side_renames
{
let dir_renames =
compute_directory_renames(base_map, ours_map, theirs_map, ours_side, theirs_side);
let outcome = apply_directory_renames(
base_map,
&eff_base,
&eff_ours,
&eff_theirs,
ours_side,
theirs_side,
&dir_renames,
);
eff_base = outcome.base;
eff_ours = outcome.ours;
eff_theirs = outcome.theirs;
rehomed_paths = outcome.rehomed;
dir_rename_collisions = outcome.collisions;
dir_rename_dirty = outcome.dirty;
}
let dir_rename_conflict_paths: BTreeMap<Vec<u8>, RehomeInfo> =
if options.directory_renames == DirectoryRenames::Conflict {
rehomed_paths
} else {
BTreeMap::new()
};
let mut all_paths = BTreeSet::new();
all_paths.extend(eff_base.keys().cloned());
all_paths.extend(eff_ours.keys().cloned());
all_paths.extend(eff_theirs.keys().cloned());
let mut paths: Vec<MergedPath> = Vec::new();
let mut leaves: MergeEntryMap = BTreeMap::new();
let mut clean = true;
for path in all_paths {
let base = eff_base.get(&path).cloned();
let ours = eff_ours.get(&path).cloned();
let theirs = eff_theirs.get(&path).cloned();
let rename = renames.dest_to_source.get(&path);
let old_path = rename.map(|r| r.source.clone());
if ours == theirs {
if let Some(entry) = ours {
leaves.insert(path.clone(), entry);
}
paths.push(clean_path(path, ours));
continue;
}
if ours == base {
if let Some(entry) = &theirs {
leaves.insert(path.clone(), *entry);
}
paths.push(clean_path(path, theirs));
continue;
}
if theirs == base {
if let Some(entry) = &ours {
leaves.insert(path.clone(), *entry);
}
paths.push(clean_path(path, ours));
continue;
}
let content_mergeable = matches!(&ours, Some((mode, _)) if is_mergeable_file_mode(*mode))
&& matches!(&theirs, Some((mode, _)) if is_mergeable_file_mode(*mode))
&& match &base {
Some((mode, _)) => is_mergeable_file_mode(*mode),
None => true,
};
if let (true, Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
(content_mergeable, &ours, &theirs)
{
let add_add = base.is_none();
let base_bytes = match &base {
Some((_, oid)) => merge_blob_bytes(db, oid)?,
None => Vec::new(),
};
let ours_bytes = merge_blob_bytes(db, ours_oid)?;
let theirs_bytes = merge_blob_bytes(db, theirs_oid)?;
let (ours_label, theirs_label) = match rename {
Some(MergeRename { source, side }) => {
let (ours_path, theirs_path) = match side {
RenameSide::Theirs => (source.as_slice(), path.as_slice()),
RenameSide::Ours => (path.as_slice(), source.as_slice()),
};
(
qualify_label(options.ours_label, ours_path),
qualify_label(options.theirs_label, theirs_path),
)
}
None => (
options.ours_label.to_string(),
options.theirs_label.to_string(),
),
};
let result = merge_blobs(
&base_bytes,
&ours_bytes,
&theirs_bytes,
&MergeBlobOptions {
ours_label: &ours_label,
theirs_label: &theirs_label,
base_label: options.ancestor_label,
style: options.style,
},
);
let base_mode = base.as_ref().map(|(mode, _)| *mode);
let (resolved_mode, mode_conflict) =
merge_file_modes(base_mode, *ours_mode, *theirs_mode);
if !result.conflicted && !mode_conflict {
let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content))?;
leaves.insert(path.clone(), (resolved_mode, oid));
paths.push(clean_path_auto(path, Some((resolved_mode, oid)), true));
} else if options.favor != MergeFavor::None && !mode_conflict {
let chosen = if options.favor == MergeFavor::Ours {
ours
} else {
theirs
};
if let Some(entry) = chosen {
leaves.insert(path.clone(), entry);
}
paths.push(clean_path_auto(path, chosen, true));
} else {
clean = false;
let oid =
db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
leaves.insert(path.clone(), (resolved_mode, oid));
let worktree_mode = if *ours_mode == *theirs_mode {
*ours_mode
} else {
0o100644
};
let conflict = match &old_path {
Some(old) => MergeConflictKind::RenameContent {
old_path: old.clone(),
},
None => MergeConflictKind::Content { add_add },
};
paths.push(MergedPath {
path: path.clone(),
stages: stages_for(&base, &ours, &theirs),
result: Some((resolved_mode, oid)),
worktree: Some((worktree_mode, result.content)),
conflict: Some(conflict),
auto_merged: true,
});
}
} else if base.is_some() && (ours.is_none() || theirs.is_none()) {
clean = false;
let (deleted_in, modified_in, surviving) = if ours.is_none() {
(
options.ours_label.to_string(),
options.theirs_label.to_string(),
theirs,
)
} else {
(
options.theirs_label.to_string(),
options.ours_label.to_string(),
ours,
)
};
let worktree = match &surviving {
Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
None => None,
};
if let Some(entry) = surviving {
leaves.insert(path.clone(), entry);
}
paths.push(MergedPath {
path: path.clone(),
stages: stages_for(&base, &ours, &theirs),
result: surviving,
worktree,
conflict: Some(MergeConflictKind::ModifyDelete {
deleted_in,
modified_in,
}),
auto_merged: false,
});
} else {
clean = false;
let add_add = base.is_none();
let surviving = ours.or(theirs);
let worktree = match &surviving {
Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
None => None,
};
if let Some(entry) = surviving {
leaves.insert(path.clone(), entry);
}
paths.push(MergedPath {
path: path.clone(),
stages: stages_for(&base, &ours, &theirs),
result: surviving,
worktree,
conflict: Some(MergeConflictKind::Content { add_add }),
auto_merged: false,
});
}
}
if !renames.rename_deletes.is_empty() {
for (dest, rd) in &renames.rename_deletes {
let Some(slot) = paths.iter_mut().find(|p| &p.path == dest) else {
continue;
};
if slot.conflict.is_some() {
continue;
}
let base_entry = base_map.get(&rd.source).copied();
let renamed_entry = slot.result;
let (ours_stage, theirs_stage) = match rd.side {
RenameSide::Ours => (renamed_entry, None),
RenameSide::Theirs => (None, renamed_entry),
};
let (renamed_in, deleted_in) = match rd.side {
RenameSide::Ours => (
options.ours_label.to_string(),
options.theirs_label.to_string(),
),
RenameSide::Theirs => (
options.theirs_label.to_string(),
options.ours_label.to_string(),
),
};
let worktree = match &renamed_entry {
Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
None => None,
};
slot.stages = MergeStages {
base: base_entry,
ours: ours_stage,
theirs: theirs_stage,
};
slot.worktree = worktree;
slot.conflict = Some(MergeConflictKind::RenameDelete {
old_path: rd.source.clone(),
renamed_in,
deleted_in,
});
clean = false;
}
}
if dir_rename_dirty {
clean = false;
}
for collision in &dir_rename_collisions {
clean = false;
if let Some(slot) = paths.iter_mut().find(|p| p.path == collision.dest) {
if slot.conflict.is_none() {
slot.conflict = Some(MergeConflictKind::DirRenameImplicitCollision {
sources: collision.sources.clone(),
});
}
}
}
if !dir_rename_conflict_paths.is_empty() {
clean = false;
for (dest, info) in &dir_rename_conflict_paths {
let (added_in, dir_renamed_in) = if info.added_on_ours {
(options.ours_label.to_string(), options.theirs_label.to_string())
} else {
(options.theirs_label.to_string(), options.ours_label.to_string())
};
if let Some(slot) = paths.iter_mut().find(|p| &p.path == dest)
&& slot.conflict.is_none()
{
slot.conflict = Some(MergeConflictKind::DirRenameLocation {
old_path: info.old_path.clone(),
renamed_from: info.renamed_from.clone(),
added_in,
dir_renamed_in,
});
}
}
}
resolve_directory_file_conflicts(
db,
&mut paths,
&mut leaves,
&mut clean,
&eff_ours,
&eff_theirs,
options,
)?;
let tree = write_merged_tree(db, &leaves)?;
Ok(MergeTreesResult { tree, paths, clean })
}
fn flatten_branch_label(branch: &str) -> String {
branch.replace('/', "_")
}
fn unique_df_path(
path: &[u8],
branch: &str,
leaves: &MergeEntryMap,
paths: &[MergedPath],
) -> Vec<u8> {
let mut base = path.to_vec();
base.push(b'~');
base.extend_from_slice(flatten_branch_label(branch).as_bytes());
let taken = |candidate: &[u8]| {
leaves.contains_key(candidate) || paths.iter().any(|p| p.path == candidate)
};
if !taken(&base) {
return base;
}
let mut suffix = 0usize;
loop {
let mut candidate = base.clone();
candidate.push(b'_');
candidate.extend_from_slice(suffix.to_string().as_bytes());
if !taken(&candidate) {
return candidate;
}
suffix += 1;
}
}
fn resolve_directory_file_conflicts(
db: &FileObjectDatabase,
paths: &mut Vec<MergedPath>,
leaves: &mut MergeEntryMap,
clean: &mut bool,
eff_ours: &MergeEntryMap,
eff_theirs: &MergeEntryMap,
options: &MergeTreesOptions<'_>,
) -> Result<()> {
let mut directory_prefixes: BTreeSet<Vec<u8>> = BTreeSet::new();
for key in leaves.keys() {
let mut idx = 0;
while let Some(pos) = key[idx..].iter().position(|b| *b == b'/') {
let end = idx + pos;
directory_prefixes.insert(key[..end].to_vec());
idx = end + 1;
}
}
if directory_prefixes.is_empty() {
return Ok(());
}
let colliding: Vec<Vec<u8>> = leaves
.keys()
.filter(|key| directory_prefixes.contains(*key))
.cloned()
.collect();
for original in colliding {
let Some(entry) = leaves.remove(&original) else {
continue;
};
let moved_bytes = merge_blob_bytes(db, &entry.1)?;
let ours_has_file = eff_ours.contains_key(&original);
let theirs_has_file = eff_theirs.contains_key(&original);
let from_ours = ours_has_file || !theirs_has_file;
let branch = if from_ours {
options.ours_label
} else {
options.theirs_label
};
let new_path = unique_df_path(&original, branch, leaves, paths);
leaves.insert(new_path.clone(), entry);
*clean = false;
if let Some(slot) = paths.iter_mut().find(|p| p.path == original) {
slot.path = new_path.clone();
slot.result = Some(entry);
if slot.conflict.is_none() {
slot.stages = MergeStages {
base: None,
ours: if from_ours { Some(entry) } else { None },
theirs: if from_ours { None } else { Some(entry) },
};
}
slot.worktree = Some((entry.0, moved_bytes));
slot.conflict = Some(MergeConflictKind::FileDirectory {
original_path: original.clone(),
moved_from: branch.to_string(),
});
} else {
paths.push(MergedPath {
path: new_path.clone(),
stages: MergeStages {
base: None,
ours: if from_ours { Some(entry) } else { None },
theirs: if from_ours { None } else { Some(entry) },
},
result: Some(entry),
worktree: Some((entry.0, moved_bytes)),
conflict: Some(MergeConflictKind::FileDirectory {
original_path: original.clone(),
moved_from: branch.to_string(),
}),
auto_merged: false,
});
}
}
paths.sort_by(|a, b| a.path.cmp(&b.path));
Ok(())
}
fn clean_path(path: Vec<u8>, result: Option<(u32, ObjectId)>) -> MergedPath {
clean_path_auto(path, result, false)
}
fn clean_path_auto(
path: Vec<u8>,
result: Option<(u32, ObjectId)>,
auto_merged: bool,
) -> MergedPath {
MergedPath {
path,
stages: MergeStages::default(),
result,
worktree: None,
conflict: None,
auto_merged,
}
}
fn stages_for(
base: &Option<(u32, ObjectId)>,
ours: &Option<(u32, ObjectId)>,
theirs: &Option<(u32, ObjectId)>,
) -> MergeStages {
MergeStages {
base: *base,
ours: *ours,
theirs: *theirs,
}
}
fn merge_blob_bytes(reader: &impl ObjectReader, oid: &ObjectId) -> Result<Vec<u8>> {
let object = reader.read_object(oid)?;
if object.object_type != ObjectType::Blob {
return Err(GitError::InvalidObject(format!(
"expected blob {}, found {}",
oid,
object.object_type.as_str()
)));
}
Ok(object.body.clone())
}
fn merge_file_modes(base: Option<u32>, ours: u32, theirs: u32) -> (u32, bool) {
if ours == theirs {
return (ours, false);
}
match base {
Some(base) if ours == base => (theirs, false),
Some(base) if theirs == base => (ours, false),
_ => (ours, true),
}
}
fn write_merged_tree(db: &FileObjectDatabase, leaves: &MergeEntryMap) -> Result<ObjectId> {
let mut root = MergeTreeNode::default();
for (path, (mode, oid)) in leaves {
root.insert(path, *mode, *oid);
}
root.write(db)
}
#[derive(Default)]
struct MergeTreeNode {
blobs: BTreeMap<Vec<u8>, (u32, ObjectId)>,
subtrees: BTreeMap<Vec<u8>, MergeTreeNode>,
}
impl MergeTreeNode {
fn insert(&mut self, path: &[u8], mode: u32, oid: ObjectId) {
match path.iter().position(|byte| *byte == b'/') {
Some(slash) => {
let component = path[..slash].to_vec();
let rest = &path[slash + 1..];
self.subtrees
.entry(component)
.or_default()
.insert(rest, mode, oid);
}
None => {
self.blobs.insert(path.to_vec(), (mode, oid));
}
}
}
fn write(&self, db: &FileObjectDatabase) -> Result<ObjectId> {
let mut entries: Vec<TreeEntry> = Vec::new();
for (name, (mode, oid)) in &self.blobs {
entries.push(TreeEntry {
mode: *mode,
name: BString::from(name.clone()),
oid: *oid,
});
}
for (name, subtree) in &self.subtrees {
let oid = subtree.write(db)?;
entries.push(TreeEntry {
mode: 0o040000,
name: BString::from(name.clone()),
oid,
});
}
entries.sort_by_key(merge_tree_sort_key);
let tree = Tree { entries };
db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
}
}
fn merge_tree_sort_key(entry: &TreeEntry) -> Vec<u8> {
let mut key = entry.name.as_bytes().to_vec();
if entry.mode == 0o040000 {
key.push(b'/');
}
key
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum RenameSide {
Ours,
Theirs,
}
#[derive(Clone)]
struct MergeRename {
source: Vec<u8>,
side: RenameSide,
}
#[derive(Clone)]
struct RenameDelete {
source: Vec<u8>,
side: RenameSide,
}
#[derive(Default)]
struct MergeRenames {
dest_to_source: BTreeMap<Vec<u8>, MergeRename>,
rename_deletes: BTreeMap<Vec<u8>, RenameDelete>,
}
struct SideRenames {
pairs: Vec<(Vec<u8>, Vec<u8>)>,
}
fn detect_merge_renames(
db: &FileObjectDatabase,
format: ObjectFormat,
base_map: &MergeEntryMap,
ours_map: &MergeEntryMap,
theirs_map: &MergeEntryMap,
options: &MergeTreesOptions<'_>,
) -> Result<(MergeRenames, SideRenames, SideRenames)> {
let mut renames = MergeRenames::default();
let ours_side = collect_side_renames(
db,
format,
base_map,
ours_map,
theirs_map,
RenameSide::Ours,
options.rename_threshold,
&mut renames,
)?;
let theirs_side = collect_side_renames(
db,
format,
base_map,
theirs_map,
ours_map,
RenameSide::Theirs,
options.rename_threshold,
&mut renames,
)?;
Ok((renames, ours_side, theirs_side))
}
#[allow(clippy::too_many_arguments)]
fn collect_side_renames(
db: &FileObjectDatabase,
format: ObjectFormat,
base_map: &MergeEntryMap,
side_map: &MergeEntryMap,
other_map: &MergeEntryMap,
side: RenameSide,
threshold: u8,
renames: &mut MergeRenames,
) -> Result<SideRenames> {
let base_tree = entry_map_as_tracked(base_map);
let side_tree = entry_map_as_tracked(side_map);
let options = RenameDetectionOptions {
base: DiffNameStatusOptions {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: false,
},
detect_inexact: true,
rename_threshold: threshold,
copy_threshold: threshold,
};
let changes = diff_name_status_maps_with_renames(
&base_tree,
&side_tree,
base_tree.keys().chain(side_tree.keys()),
options,
|oid| merge_blob_bytes(db, oid).ok(),
)?;
let mut pairs = Vec::new();
for change in changes {
let NameStatus::Renamed(_) = change.status else {
continue;
};
let Some(old_path) = change.old_path.as_ref() else {
continue;
};
let old = old_path.as_bytes().to_vec();
let new = change.path.as_bytes().to_vec();
pairs.push((old.clone(), new.clone()));
if !other_map.contains_key(&old) {
if base_map.contains_key(&old) && !other_map.contains_key(&new) {
renames
.rename_deletes
.entry(new.clone())
.or_insert(RenameDelete {
source: old.clone(),
side,
});
}
continue;
}
if other_map.contains_key(&new) {
continue;
}
renames
.dest_to_source
.entry(new)
.or_insert(MergeRename { source: old, side });
}
let _ = format;
Ok(SideRenames { pairs })
}
fn apply_merge_renames(
base_map: &MergeEntryMap,
ours_map: &MergeEntryMap,
theirs_map: &MergeEntryMap,
renames: &MergeRenames,
) -> (MergeEntryMap, MergeEntryMap, MergeEntryMap) {
if renames.dest_to_source.is_empty() {
return (base_map.clone(), ours_map.clone(), theirs_map.clone());
}
let mut base = base_map.clone();
let mut ours = ours_map.clone();
let mut theirs = theirs_map.clone();
for (new, rename) in &renames.dest_to_source {
let old = &rename.source;
if let Some(entry) = base.remove(old) {
base.entry(new.clone()).or_insert(entry);
}
for side in [&mut ours, &mut theirs] {
if let Some(entry) = side.remove(old) {
side.entry(new.clone()).or_insert(entry);
}
}
}
(base, ours, theirs)
}
fn parent_dir(path: &[u8]) -> Option<&[u8]> {
path.iter().rposition(|b| *b == b'/').map(|i| &path[..i])
}
fn apply_dir_rename(old_dir: &[u8], new_dir: &[u8], path: &[u8]) -> Vec<u8> {
let rest_start = if new_dir.is_empty() {
old_dir.len() + 1
} else {
old_dir.len()
};
let mut out = new_dir.to_vec();
out.extend_from_slice(&path[rest_start..]);
out
}
fn check_dir_renamed<'a>(
path: &[u8],
dir_renames: &'a BTreeMap<Vec<u8>, Vec<u8>>,
) -> Option<(&'a [u8], &'a [u8])> {
let mut cur = parent_dir(path);
while let Some(dir) = cur {
if let Some((old_dir, new_dir)) = dir_renames.get_key_value(dir) {
return Some((old_dir.as_slice(), new_dir.as_slice()));
}
cur = parent_dir(dir);
}
None
}
struct DirectoryRenameMaps {
ours: BTreeMap<Vec<u8>, Vec<u8>>,
theirs: BTreeMap<Vec<u8>, Vec<u8>>,
split_dirs: BTreeSet<Vec<u8>>,
}
fn compute_directory_renames(
base_map: &MergeEntryMap,
ours_map: &MergeEntryMap,
theirs_map: &MergeEntryMap,
ours_side: &SideRenames,
theirs_side: &SideRenames,
) -> DirectoryRenameMaps {
let ours = compute_side_dir_renames(&ours_side.pairs, base_map, ours_map);
let theirs = compute_side_dir_renames(&theirs_side.pairs, base_map, theirs_map);
let mut split_dirs = BTreeSet::new();
split_dirs.extend(ours.split.iter().cloned());
split_dirs.extend(theirs.split.iter().cloned());
let mut ours_map_out = ours.renames;
let mut theirs_map_out = theirs.renames;
let dup: Vec<Vec<u8>> = ours_map_out
.keys()
.filter(|k| theirs_map_out.contains_key(*k))
.cloned()
.collect();
for k in dup {
ours_map_out.remove(&k);
theirs_map_out.remove(&k);
}
DirectoryRenameMaps {
ours: ours_map_out,
theirs: theirs_map_out,
split_dirs,
}
}
struct SideDirRenames {
renames: BTreeMap<Vec<u8>, Vec<u8>>,
split: BTreeSet<Vec<u8>>,
}
fn compute_side_dir_renames(
pairs: &[(Vec<u8>, Vec<u8>)],
base_map: &MergeEntryMap,
side_map: &MergeEntryMap,
) -> SideDirRenames {
let mut counts: BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>> = BTreeMap::new();
for (old, new) in pairs {
update_dir_rename_counts(&mut counts, old, new);
}
let mut renames = BTreeMap::new();
let mut split = BTreeSet::new();
for (old_dir, targets) in counts {
let mut max = 0usize;
let mut bad_max = 0usize;
let mut best: Option<Vec<u8>> = None;
for (target, count) in &targets {
if *count == max {
bad_max = max;
} else if *count > max {
max = *count;
best = Some(target.clone());
}
}
if max == 0 {
continue;
}
if bad_max == max {
split.insert(old_dir);
continue;
}
if let Some(best) = best
&& directory_fully_removed(&old_dir, base_map, side_map)
{
renames.insert(old_dir, best);
}
}
SideDirRenames { renames, split }
}
fn update_dir_rename_counts(
counts: &mut BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>>,
old: &[u8],
new: &[u8],
) {
let mut old_dir = old.to_vec();
let mut new_dir = new.to_vec();
let mut first = true;
loop {
let old_has = dir_munge(&mut old_dir);
let new_has = dir_munge(&mut new_dir);
if !first {
let old_sub = trailing_component(old, &old_dir);
let new_sub = trailing_component(new, &new_dir);
if old_sub != new_sub {
break;
}
}
if old_dir == new_dir {
break;
}
*counts
.entry(old_dir.clone())
.or_default()
.entry(new_dir.clone())
.or_default() += 1;
first = false;
if old_dir.is_empty() || new_dir.is_empty() {
break;
}
if !old_has || !new_has {
break;
}
}
}
fn dir_munge(buf: &mut Vec<u8>) -> bool {
match buf.iter().rposition(|b| *b == b'/') {
Some(i) => {
buf.truncate(i);
true
}
None => {
buf.clear();
false
}
}
}
fn trailing_component<'a>(full: &'a [u8], dir: &[u8]) -> &'a [u8] {
if dir.is_empty() {
full
} else {
&full[dir.len() + 1..]
}
}
fn directory_fully_removed(dir: &[u8], base_map: &MergeEntryMap, side_map: &MergeEntryMap) -> bool {
let mut prefix = dir.to_vec();
prefix.push(b'/');
for path in base_map.keys() {
if path.starts_with(&prefix) && side_map.contains_key(path) {
return false;
}
}
true
}
struct DirRenameMove {
from: Vec<u8>,
to: Vec<u8>,
renamed_from: Option<Vec<u8>>,
}
struct RehomeInfo {
old_path: Vec<u8>,
renamed_from: Option<Vec<u8>>,
added_on_ours: bool,
}
struct DirRenameCollision {
dest: Vec<u8>,
sources: Vec<Vec<u8>>,
}
struct DirRenameOutcome {
base: MergeEntryMap,
ours: MergeEntryMap,
theirs: MergeEntryMap,
rehomed: BTreeMap<Vec<u8>, RehomeInfo>,
collisions: Vec<DirRenameCollision>,
dirty: bool,
}
fn apply_directory_renames(
base_map: &MergeEntryMap,
eff_base: &MergeEntryMap,
eff_ours: &MergeEntryMap,
eff_theirs: &MergeEntryMap,
ours_side: &SideRenames,
theirs_side: &SideRenames,
dir_renames: &DirectoryRenameMaps,
) -> DirRenameOutcome {
let mut base = eff_base.clone();
let mut ours = eff_ours.clone();
let mut theirs = eff_theirs.clone();
let mut rehomed = BTreeMap::new();
let mut collisions = Vec::new();
let mut dirty = false;
let ours_excl = exclusion_dirs(&dir_renames.ours);
let theirs_excl = exclusion_dirs(&dir_renames.theirs);
let ours_moves = plan_rehome(
base_map,
&ours,
ours_side,
&dir_renames.theirs,
&ours_excl,
&dir_renames.split_dirs,
&mut collisions,
&mut dirty,
);
let theirs_moves = plan_rehome(
base_map,
&theirs,
theirs_side,
&dir_renames.ours,
&theirs_excl,
&dir_renames.split_dirs,
&mut collisions,
&mut dirty,
);
apply_rehome_moves(
&mut base,
&mut ours,
&mut theirs,
ours_moves,
true,
&mut rehomed,
&mut collisions,
&mut dirty,
);
apply_rehome_moves(
&mut base,
&mut ours,
&mut theirs,
theirs_moves,
false,
&mut rehomed,
&mut collisions,
&mut dirty,
);
DirRenameOutcome {
base,
ours,
theirs,
rehomed,
collisions,
dirty,
}
}
fn exclusion_dirs(side_dir_renames: &BTreeMap<Vec<u8>, Vec<u8>>) -> BTreeSet<Vec<u8>> {
side_dir_renames.values().cloned().collect()
}
#[allow(clippy::too_many_arguments)]
fn plan_rehome(
base_map: &MergeEntryMap,
side: &MergeEntryMap,
side_renames: &SideRenames,
renamer_dirs: &BTreeMap<Vec<u8>, Vec<u8>>,
exclusions: &BTreeSet<Vec<u8>>,
split_dirs: &BTreeSet<Vec<u8>>,
collisions: &mut Vec<DirRenameCollision>,
dirty: &mut bool,
) -> Vec<DirRenameMove> {
if renamer_dirs.is_empty() {
return Vec::new();
}
let side_rename_src: BTreeMap<&[u8], &[u8]> = side_renames
.pairs
.iter()
.map(|(o, n)| (n.as_slice(), o.as_slice()))
.collect();
let candidates: Vec<Vec<u8>> = side
.keys()
.filter(|p| !base_map.contains_key(*p) || side_rename_src.contains_key(p.as_slice()))
.cloned()
.collect();
let mut planned: BTreeMap<Vec<u8>, Vec<DirRenameMove>> = BTreeMap::new();
for path in candidates {
let Some((old_dir, new_dir)) = check_dir_renamed(&path, renamer_dirs) else {
continue;
};
if split_dirs.contains(old_dir) {
*dirty = true;
continue;
}
if exclusions.contains(new_dir) {
continue;
}
let dest = apply_dir_rename(old_dir, new_dir, &path);
if dest == path {
continue;
}
let renamed_from = side_rename_src.get(path.as_slice()).map(|s| s.to_vec());
planned.entry(dest.clone()).or_default().push(DirRenameMove {
from: path,
to: dest,
renamed_from,
});
}
let mut moves = Vec::new();
for (dest, group) in planned {
if group.len() > 1 {
*dirty = true;
collisions.push(DirRenameCollision {
dest,
sources: group.into_iter().map(|m| m.from).collect(),
});
continue;
}
moves.push(group.into_iter().next().expect("non-empty"));
}
moves
}
#[allow(clippy::too_many_arguments)]
fn apply_rehome_moves(
base: &mut MergeEntryMap,
ours: &mut MergeEntryMap,
theirs: &mut MergeEntryMap,
moves: Vec<DirRenameMove>,
side_is_ours: bool,
rehomed: &mut BTreeMap<Vec<u8>, RehomeInfo>,
collisions: &mut Vec<DirRenameCollision>,
dirty: &mut bool,
) {
for mv in moves {
let occupied = |m: &MergeEntryMap| m.contains_key(&mv.to);
if (occupied(base) || occupied(ours) || occupied(theirs)) && mv.to != mv.from {
*dirty = true;
collisions.push(DirRenameCollision {
dest: mv.to.clone(),
sources: vec![mv.from.clone()],
});
continue;
}
let mut moved = false;
for m in [&mut *base, &mut *ours, &mut *theirs] {
if let Some(entry) = m.remove(&mv.from) {
m.insert(mv.to.clone(), entry);
moved = true;
}
}
if moved {
rehomed.insert(
mv.to.clone(),
RehomeInfo {
old_path: mv.from.clone(),
renamed_from: mv.renamed_from.clone(),
added_on_ours: side_is_ours,
},
);
}
}
}
fn qualify_label(label: &str, path: &[u8]) -> String {
format!("{label}:{}", String::from_utf8_lossy(path))
}
fn entry_map_as_tracked(map: &MergeEntryMap) -> BTreeMap<Vec<u8>, TrackedEntry> {
map.iter()
.map(|(path, (mode, oid))| {
(
path.clone(),
TrackedEntry {
mode: *mode,
oid: *oid,
},
)
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use sley_formats::RepositoryLayout;
use sley_object::TreeEntry;
use sley_odb::ObjectWriter;
use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
#[test]
fn name_status_reports_added_from_index() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let oid = db
.write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
.expect("test operation should succeed");
let index = Index {
version: 2,
entries: vec![sley_index::IndexEntry {
ctime_seconds: 0,
ctime_nanoseconds: 0,
mtime_seconds: 0,
mtime_nanoseconds: 0,
dev: 0,
ino: 0,
mode: 0o100644,
uid: 0,
gid: 0,
size: 6,
oid,
flags: "hello.txt".len() as u16,
flags_extended: 0,
path: BString::from(b"hello.txt"),
}],
extensions: Vec::new(),
checksum: None,
};
fs::write(
layout.git_dir.join("index"),
index
.write_v2_sha1()
.expect("test operation should succeed"),
)
.expect("test operation should succeed");
fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
let changes = diff_name_status_head_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
.expect("test operation should succeed");
assert_eq!(changes[0].line(), "A\thello.txt");
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn index_worktree_diff_returns_staged_gitlinks() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let oid = ObjectId::from_hex(
ObjectFormat::Sha1,
"1111111111111111111111111111111111111111",
)
.expect("test operation should succeed");
let index = Index {
version: 2,
entries: vec![sley_index::IndexEntry {
ctime_seconds: 0,
ctime_nanoseconds: 0,
mtime_seconds: 0,
mtime_nanoseconds: 0,
dev: 0,
ino: 0,
mode: sley_index::GITLINK_MODE,
uid: 0,
gid: 0,
size: 0,
oid,
flags: "deps/sub".len() as u16,
flags_extended: 0,
path: BString::from(b"deps/sub"),
}],
extensions: Vec::new(),
checksum: None,
};
fs::write(
layout.git_dir.join("index"),
index
.write_v2_sha1()
.expect("test operation should succeed"),
)
.expect("test operation should succeed");
let diff = diff_name_status_index_worktree_with_options_and_gitlinks(
&root,
&layout.git_dir,
ObjectFormat::Sha1,
DiffNameStatusOptions::default(),
)
.expect("test operation should succeed");
assert_eq!(diff.entries.len(), 1);
let gitlinks = diff.staged_gitlinks;
assert_eq!(gitlinks.len(), 1);
assert_eq!(gitlinks[0].path.as_bytes(), b"deps/sub");
assert_eq!(gitlinks[0].oid, oid);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[cfg(unix)]
#[test]
fn index_worktree_diff_ignores_untracked_dangling_symlink() {
use std::os::unix::fs::symlink;
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let oid = db
.write_object(EncodedObject::new(ObjectType::Blob, b"clean\n".to_vec()))
.expect("test operation should succeed");
let index = Index {
version: 2,
entries: vec![sley_index::IndexEntry {
ctime_seconds: 0,
ctime_nanoseconds: 0,
mtime_seconds: 0,
mtime_nanoseconds: 0,
dev: 0,
ino: 0,
mode: 0o100644,
uid: 0,
gid: 0,
size: 6,
oid,
flags: "tracked.txt".len() as u16,
flags_extended: 0,
path: BString::from(b"tracked.txt"),
}],
extensions: Vec::new(),
checksum: None,
};
fs::write(
layout.git_dir.join("index"),
index
.write_v2_sha1()
.expect("test operation should succeed"),
)
.expect("test operation should succeed");
fs::write(root.join("tracked.txt"), b"clean\n").expect("test operation should succeed");
symlink("missing-target", root.join("untracked-link"))
.expect("test operation should succeed");
let changes = diff_name_status_index_worktree_with_options(
&root,
&layout.git_dir,
ObjectFormat::Sha1,
DiffNameStatusOptions {
detect_renames: false,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
)
.expect("untracked dangling symlink should be ignored");
assert!(changes.is_empty());
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn index_worktree_diff_trusts_non_racy_stat_cache() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let worktree_path = root.join("tracked.txt");
fs::write(&worktree_path, b"clean\n").expect("test operation should succeed");
let metadata = fs::symlink_metadata(&worktree_path).expect("test operation should succeed");
let (mtime_seconds, mtime_nanoseconds) =
sley_index::file_mtime_parts(&metadata).expect("test operation should succeed");
let bogus_oid = ObjectId::from_hex(
ObjectFormat::Sha1,
"1111111111111111111111111111111111111111",
)
.expect("test operation should succeed");
let index = Index {
version: 2,
entries: vec![sley_index::IndexEntry {
ctime_seconds: 0,
ctime_nanoseconds: 0,
mtime_seconds: mtime_seconds as u32,
mtime_nanoseconds: mtime_nanoseconds as u32,
dev: 0,
ino: 0,
mode: sley_index::worktree_metadata_mode(&metadata),
uid: 0,
gid: 0,
size: metadata.len() as u32,
oid: bogus_oid,
flags: "tracked.txt".len() as u16,
flags_extended: 0,
path: BString::from(b"tracked.txt"),
}],
extensions: Vec::new(),
checksum: None,
};
std::thread::sleep(std::time::Duration::from_millis(1100));
fs::write(
layout.git_dir.join("index"),
index
.write_v2_sha1()
.expect("test operation should succeed"),
)
.expect("test operation should succeed");
let changes = diff_name_status_index_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
.expect("test operation should succeed");
assert!(
changes.is_empty(),
"a clean non-racy stat match must reuse the cached index oid"
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
fn temp_root() -> PathBuf {
let path = std::env::temp_dir().join(format!(
"sley-diff-{}-{}",
std::process::id(),
TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
));
fs::create_dir_all(&path).expect("test operation should succeed");
path
}
fn merge_opts() -> MergeBlobOptions<'static> {
MergeBlobOptions {
ours_label: "ours",
theirs_label: "theirs",
base_label: "base",
style: ConflictStyle::Merge,
}
}
#[test]
fn split_lines_preserves_content_and_newlines() {
let lines = split_lines(b"a\nb\nc\n");
assert_eq!(lines.len(), 3);
assert_eq!(lines[0].content, b"a\n");
assert!(lines[0].has_newline);
assert_eq!(lines[2].content, b"c\n");
assert!(lines[2].has_newline);
assert!(split_lines(b"").is_empty());
}
#[test]
fn split_lines_tracks_missing_final_newline() {
let lines = split_lines(b"a\nb");
assert_eq!(lines.len(), 2);
assert!(lines[0].has_newline);
assert!(!lines[1].has_newline);
assert_eq!(lines[1].content, b"b");
assert_eq!(lines[1].bytes_without_newline(), b"b");
let with_nl = split_lines(b"b\n");
assert_ne!(lines[1], with_nl[0]);
}
#[test]
fn myers_replace_single_line() {
let old = split_lines(b"a\nb\nc\n");
let new = split_lines(b"a\nx\nc\n");
assert_eq!(
myers_diff_lines(&old, &new),
vec![
DiffOp::Equal(1),
DiffOp::Delete(1),
DiffOp::Insert(1),
DiffOp::Equal(1),
]
);
}
#[test]
fn myers_identical_is_single_equal() {
let old = split_lines(b"a\nb\nc\n");
let new = split_lines(b"a\nb\nc\n");
assert_eq!(myers_diff_lines(&old, &new), vec![DiffOp::Equal(3)]);
}
#[test]
fn myers_pure_insert_and_delete() {
let empty = split_lines(b"");
let two = split_lines(b"a\nb\n");
assert_eq!(myers_diff_lines(&empty, &two), vec![DiffOp::Insert(2)]);
assert_eq!(myers_diff_lines(&two, &empty), vec![DiffOp::Delete(2)]);
let old = split_lines(b"a\nb\nc\nd\n");
let new = split_lines(b"a\nc\nd\n");
assert_eq!(
myers_diff_lines(&old, &new),
vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)]
);
}
#[test]
fn myers_reconstructs_new_and_is_minimal() {
let old = split_lines(b"the\nquick\nbrown\nfox\n");
let new = split_lines(b"the\nlazy\nbrown\ncat\n");
let ops = myers_diff_lines(&old, &new);
let mut oi = 0usize;
let mut ni = 0usize;
let mut edits = 0usize;
let mut rebuilt: Vec<u8> = Vec::new();
for op in &ops {
match *op {
DiffOp::Equal(n) => {
for _ in 0..n {
assert_eq!(old[oi], new[ni]);
rebuilt.extend_from_slice(old[oi].content);
oi += 1;
ni += 1;
}
}
DiffOp::Delete(n) => {
oi += n;
edits += n;
}
DiffOp::Insert(n) => {
for _ in 0..n {
rebuilt.extend_from_slice(new[ni].content);
ni += 1;
}
edits += n;
}
}
}
assert_eq!(rebuilt, b"the\nlazy\nbrown\ncat\n");
assert_eq!(edits, 4);
}
#[test]
fn merge_non_overlapping_changes_is_clean() {
let base = b"a\nb\nc\nd\ne\n";
let ours = b"A\nb\nc\nd\ne\n";
let theirs = b"a\nb\nc\nd\nE\n";
let result = merge_blobs(base, ours, theirs, &merge_opts());
assert!(!result.conflicted);
assert_eq!(result.content, b"A\nb\nc\nd\nE\n");
}
#[test]
fn merge_identical_changes_no_conflict() {
let base = b"a\nb\nc\n";
let ours = b"a\nX\nc\n";
let theirs = b"a\nX\nc\n";
let result = merge_blobs(base, ours, theirs, &merge_opts());
assert!(!result.conflicted);
assert_eq!(result.content, b"a\nX\nc\n");
}
#[test]
fn merge_overlapping_change_emits_exact_markers() {
let base = b"a\nb\nc\n";
let ours = b"a\nOURS\nc\n";
let theirs = b"a\nTHEIRS\nc\n";
let result = merge_blobs(base, ours, theirs, &merge_opts());
assert!(result.conflicted);
assert_eq!(
result.content,
b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\nc\n".to_vec(),
);
}
#[test]
fn merge_diff3_style_includes_base_section() {
let base = b"a\nb\nc\n";
let ours = b"a\nOURS\nc\n";
let theirs = b"a\nTHEIRS\nc\n";
let options = MergeBlobOptions {
style: ConflictStyle::Diff3,
..merge_opts()
};
let result = merge_blobs(base, ours, theirs, &options);
assert!(result.conflicted);
assert_eq!(
result.content,
b"a\n<<<<<<< ours\nOURS\n||||||| base\nb\n=======\nTHEIRS\n>>>>>>> theirs\nc\n"
.to_vec(),
);
}
#[test]
fn merge_empty_label_omits_trailing_space() {
let base = b"a\nb\nc\n";
let ours = b"a\nOURS\nc\n";
let theirs = b"a\nTHEIRS\nc\n";
let options = MergeBlobOptions {
ours_label: "",
theirs_label: "",
base_label: "",
style: ConflictStyle::Merge,
};
let result = merge_blobs(base, ours, theirs, &options);
assert!(result.conflicted);
assert_eq!(
result.content,
b"a\n<<<<<<<\nOURS\n=======\nTHEIRS\n>>>>>>>\nc\n".to_vec(),
);
}
#[test]
fn merge_add_add_empty_base_conflicts() {
let result = merge_blobs(b"", b"x\ny\n", b"p\nq\n", &merge_opts());
assert!(result.conflicted);
assert_eq!(
result.content,
b"<<<<<<< ours\nx\ny\n=======\np\nq\n>>>>>>> theirs\n".to_vec(),
);
}
#[test]
fn merge_add_add_empty_base_identical_is_clean() {
let result = merge_blobs(b"", b"x\ny\n", b"x\ny\n", &merge_opts());
assert!(!result.conflicted);
assert_eq!(result.content, b"x\ny\n");
}
#[test]
fn merge_deletion_one_side_takes_deletion() {
let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nb\nc\n", &merge_opts());
assert!(!result.conflicted);
assert_eq!(result.content, b"a\nc\n");
}
#[test]
fn merge_deletion_vs_modification_conflicts() {
let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nB!\nc\n", &merge_opts());
assert!(result.conflicted);
assert_eq!(
result.content,
b"a\n<<<<<<< ours\n=======\nB!\n>>>>>>> theirs\nc\n".to_vec(),
);
}
#[test]
fn merge_missing_final_newline_marker_starts_on_own_line() {
let base = b"a\nb";
let ours = b"a\nOURS";
let theirs = b"a\nTHEIRS";
let result = merge_blobs(base, ours, theirs, &merge_opts());
assert!(result.conflicted);
assert_eq!(
result.content,
b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\n".to_vec(),
);
}
#[test]
fn merge_clean_preserves_missing_final_newline() {
let result = merge_blobs(b"a\nb\n", b"a\nb", b"a\nb\n", &merge_opts());
assert!(!result.conflicted);
assert_eq!(result.content, b"a\nb");
}
#[test]
fn merge_both_append_identical_tail_is_clean() {
let result = merge_blobs(b"a\n", b"a\nz\n", b"a\nz\n", &merge_opts());
assert!(!result.conflicted);
assert_eq!(result.content, b"a\nz\n");
}
#[test]
fn merge_when_ours_equals_base_yields_theirs() {
let base = b"b\na\n";
let theirs = b"b\nb\nc\na\nc\n";
let result = merge_blobs(base, base, theirs, &merge_opts());
assert!(!result.conflicted);
assert_eq!(result.content, theirs.to_vec());
}
fn applied(outcome: ApplyOutcome) -> Vec<u8> {
match outcome {
ApplyOutcome::Applied(bytes) => bytes,
ApplyOutcome::Rejected => panic!("expected Applied, got Rejected"),
}
}
#[test]
fn parse_multi_file_patch() {
let patch = b"\
diff --git a/one.txt b/one.txt
index aaaaaaa..bbbbbbb 100644
--- a/one.txt
+++ b/one.txt
@@ -1,3 +1,3 @@
alpha
-beta
+BETA
gamma
diff --git a/two.txt b/two.txt
index ccccccc..ddddddd 100644
--- a/two.txt
+++ b/two.txt
@@ -1,2 +1,3 @@
first
+inserted
second
";
let patches = parse_unified_patch(patch).expect("test operation should succeed");
assert_eq!(patches.len(), 2);
assert_eq!(patches[0].old_path.as_deref(), Some(b"one.txt".as_slice()));
assert_eq!(patches[0].new_path.as_deref(), Some(b"one.txt".as_slice()));
assert_eq!(patches[0].old_mode, None);
assert_eq!(patches[0].hunks.len(), 1);
let h = &patches[0].hunks[0];
assert_eq!(
(h.old_start, h.old_len, h.new_start, h.new_len),
(1, 3, 1, 3)
);
assert_eq!(
h.lines,
vec![
HunkLine::Context(b"alpha".to_vec()),
HunkLine::Delete(b"beta".to_vec()),
HunkLine::Insert(b"BETA".to_vec()),
HunkLine::Context(b"gamma".to_vec()),
]
);
assert_eq!(patches[1].new_path.as_deref(), Some(b"two.txt".as_slice()));
assert_eq!(patches[1].hunks[0].new_len, 3);
}
#[test]
fn parse_default_hunk_range_length() {
let patch = b"\
--- a/x
+++ b/x
@@ -1 +1,2 @@
line
+added
";
let patches = parse_unified_patch(patch).expect("test operation should succeed");
let h = &patches[0].hunks[0];
assert_eq!(
(h.old_start, h.old_len, h.new_start, h.new_len),
(1, 1, 1, 2)
);
}
#[test]
fn parse_hunk_header_before_file_errors() {
let patch = b"@@ -1,1 +1,1 @@\n context\n";
assert!(parse_unified_patch(patch).is_err());
}
#[test]
fn parse_mismatched_counts_errors() {
let patch = b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n only\n+new\n";
assert!(parse_unified_patch(patch).is_err());
}
#[test]
fn apply_clean_hunk() {
let base = b"alpha\nbeta\ngamma\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
)
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"alpha\nBETA\ngamma\n");
}
#[test]
fn apply_with_line_offset() {
let base = b"pre1\npre2\npre3\nalpha\nbeta\ngamma\ntail\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -2,4 +2,4 @@\n alpha\n-beta\n+BETA\n gamma\n tail\n",
)
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"pre1\npre2\npre3\nalpha\nBETA\ngamma\ntail\n");
}
#[test]
fn apply_with_negative_line_offset() {
let base = b"alpha\nbeta\ngamma\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -50,3 +50,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
)
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"alpha\nBETA\ngamma\n");
}
#[test]
fn apply_multiple_hunks() {
let base = b"a\nb\nc\nd\ne\nf\ng\nh\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n\
@@ -1,3 +1,3 @@\n a\n-b\n+B\n c\n\
@@ -6,3 +6,3 @@\n f\n-g\n+G\n h\n",
)
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"a\nB\nc\nd\ne\nf\nG\nh\n");
}
#[test]
fn reject_on_context_mismatch() {
let base = b"alpha\nDIFFERENT\ngamma\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
)
.expect("test operation should succeed");
assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
}
#[test]
fn reject_when_match_end_required_but_not_at_eof() {
let base = b"one\ntwo\nanchor\nalready\nappended\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n",
)
.expect("test operation should succeed");
assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
}
#[test]
fn append_at_eof_matches_when_context_reaches_end() {
let base = b"one\ntwo\nanchor\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n",
)
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"one\ntwo\nanchor\nadded1\nadded2\n");
}
#[test]
fn reject_when_match_beginning_required_but_not_at_start() {
let base = b"junk\nalpha\nbeta\ngamma\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n alpha\n+INSERT\n beta\n",
)
.expect("test operation should succeed");
assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
}
#[test]
fn no_default_fuzz_rejects_on_trailing_context_mismatch() {
let base = b"alpha\nbeta\nDIVERGED\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
)
.expect("test operation should succeed");
assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
}
#[test]
fn parse_and_apply_new_file() {
let patch = parse_unified_patch(
b"\
diff --git a/new.txt b/new.txt
new file mode 100644
index 0000000..1111111
--- /dev/null
+++ b/new.txt
@@ -0,0 +1,2 @@
+hello
+world
",
)
.expect("test operation should succeed");
assert!(patches_first_is_new(&patch));
assert_eq!(patch[0].old_path, None);
assert_eq!(patch[0].new_path.as_deref(), Some(b"new.txt".as_slice()));
assert_eq!(patch[0].new_mode, Some(0o100644));
let out = applied(apply_file_patch(b"garbage that is ignored", &patch[0]));
assert_eq!(out, b"hello\nworld\n");
}
fn patches_first_is_new(patches: &[FilePatch]) -> bool {
patches.first().map(|p| p.is_new).unwrap_or(false)
}
#[test]
fn parse_and_apply_delete_file() {
let patch = parse_unified_patch(
b"\
diff --git a/gone.txt b/gone.txt
deleted file mode 100644
index 1111111..0000000
--- a/gone.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-hello
-world
",
)
.expect("test operation should succeed");
assert!(patch[0].is_delete);
assert_eq!(patch[0].old_path.as_deref(), Some(b"gone.txt".as_slice()));
assert_eq!(patch[0].new_path, None);
assert_eq!(patch[0].old_mode, Some(0o100644));
let out = applied(apply_file_patch(b"hello\nworld\n", &patch[0]));
assert_eq!(out, b"");
}
#[test]
fn parse_rename_headers() {
let patch = parse_unified_patch(
b"\
diff --git a/old/name.txt b/new/name.txt
similarity index 100%
rename from old/name.txt
rename to new/name.txt
",
)
.expect("test operation should succeed");
assert!(patch[0].is_rename);
assert_eq!(
patch[0].old_path.as_deref(),
Some(b"old/name.txt".as_slice())
);
assert_eq!(
patch[0].new_path.as_deref(),
Some(b"new/name.txt".as_slice())
);
assert!(patch[0].hunks.is_empty());
}
#[test]
fn parse_mode_change_headers() {
let patch = parse_unified_patch(
b"\
diff --git a/script.sh b/script.sh
old mode 100644
new mode 100755
",
)
.expect("test operation should succeed");
assert_eq!(patch[0].old_mode, Some(0o100644));
assert_eq!(patch[0].new_mode, Some(0o100755));
assert!(!patch[0].is_new);
assert!(!patch[0].is_delete);
}
#[test]
fn no_final_newline_base_preserved_when_untouched() {
let base = b"alpha\nbeta\nnotail"; let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n-alpha\n+ALPHA\n beta\n notail\n\\ No newline at end of file\n",
)
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"ALPHA\nbeta\nnotail");
}
#[test]
fn no_final_newline_added_by_patch() {
let base = b"alpha\nbeta\n";
let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n",
)
.expect("test operation should succeed");
assert!(patch[0].hunks[0].new_no_newline);
assert!(!patch[0].hunks[0].old_no_newline);
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"alpha\nbeta-notail");
}
#[test]
fn no_final_newline_in_base_matched_and_kept() {
let base = b"alpha\nbeta"; let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n-alpha\n+ALPHA\n beta\n\\ No newline at end of file\n",
)
.expect("test operation should succeed");
assert!(patch[0].hunks[0].old_no_newline);
assert!(patch[0].hunks[0].new_no_newline);
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"ALPHA\nbeta");
}
#[test]
fn no_final_newline_mismatch_rejected() {
let base = b"alpha\nbeta\n"; let patch = parse_unified_patch(
b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n\\ No newline at end of file\n+beta2\n",
)
.expect("test operation should succeed");
assert!(patch[0].hunks[0].old_no_newline);
assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
}
#[test]
fn delete_with_no_final_newline() {
let base = b"only line no newline";
let patch = parse_unified_patch(
b"--- a/x\n+++ /dev/null\n@@ -1,1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n",
)
.expect("test operation should succeed");
assert!(patch[0].is_delete);
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"");
}
#[test]
fn apply_pure_insertion_hunk() {
let base = b"first\nsecond\n";
let patch =
parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n first\n+middle\n second\n")
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"first\nmiddle\nsecond\n");
}
#[test]
fn apply_pure_deletion_hunk() {
let base = b"first\nmiddle\nsecond\n";
let patch =
parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,2 @@\n first\n-middle\n second\n")
.expect("test operation should succeed");
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"first\nsecond\n");
}
#[test]
fn apply_then_reparse_round_trip() {
let base = b"l1\nl2\nl3\nl4\nl5\n";
let text = b"--- a/f\n+++ b/f\n@@ -2,3 +2,4 @@\n l2\n-l3\n+L3\n+L3b\n l4\n";
let p1 = parse_unified_patch(text).expect("test operation should succeed");
let p2 = parse_unified_patch(text).expect("test operation should succeed");
assert_eq!(p1, p2);
let out = applied(apply_file_patch(base, &p1[0]));
assert_eq!(out, b"l1\nl2\nL3\nL3b\nl4\nl5\n");
}
#[test]
fn empty_context_line_without_trailing_space() {
let base = b"a\n\nb\n";
let patch = parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n a\n\n-b\n+B\n")
.expect("test operation should succeed");
assert_eq!(patch[0].hunks[0].lines[1], HunkLine::Context(Vec::new()));
let out = applied(apply_file_patch(base, &patch[0]));
assert_eq!(out, b"a\n\nB\n");
}
#[test]
fn split_blob_lines_handles_edge_cases() {
assert!(split_blob_lines(b"").is_empty());
let single = split_blob_lines(b"abc");
assert_eq!(single.len(), 1);
assert!(single[0].no_newline);
let terminated = split_blob_lines(b"abc\n");
assert_eq!(terminated.len(), 1);
assert!(!terminated[0].no_newline);
let blank_then_eof = split_blob_lines(b"x\n");
assert_eq!(blank_then_eof.len(), 1);
}
#[test]
fn similarity_identical_and_empty_conventions() {
assert_eq!(blob_similarity(b"hello\nworld\n", b"hello\nworld\n"), 100);
assert_eq!(blob_similarity(b"", b""), 100);
assert_eq!(blob_similarity(b"", b"hello\n"), 0);
assert_eq!(blob_similarity(b"hello\n", b""), 0);
}
#[test]
fn similarity_one_changed_line_is_75_and_symmetric() {
let a = b"one\ntwo\nthree\nfour\nfive\n";
let b = b"one\ntwo\nTHREE\nfour\nfive\n";
assert_eq!(blob_similarity(a, b), 75);
assert_eq!(blob_similarity(b, a), 75);
}
#[test]
fn similarity_one_edited_line_of_three_is_66_not_67() {
assert_eq!(blob_similarity(b"a\nb\nc\n", b"a\nB\nc\n"), 66);
assert_eq!(blob_similarity(b"a\nB\nc\n", b"a\nb\nc\n"), 66);
}
#[test]
fn similarity_small_append_is_88() {
let a = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n";
let b = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n";
assert_eq!(blob_similarity(a, b), 88);
}
#[test]
fn similarity_half_rewrite_is_50() {
let a = b"l1\nl2\nl3\nl4\nl5\nl6\n";
let b = b"l1\nl2\nl3\nX4\nX5\nX6\n";
assert_eq!(blob_similarity(a, b), 50);
}
fn write_blob(db: &mut FileObjectDatabase, bytes: &[u8]) -> ObjectId {
db.write_object(EncodedObject::new(ObjectType::Blob, bytes.to_vec()))
.expect("test operation should succeed")
}
fn write_tree(db: &mut FileObjectDatabase, entries: &[(&[u8], u32, ObjectId)]) -> ObjectId {
let mut tree_entries: Vec<TreeEntry> = entries
.iter()
.map(|(name, mode, oid)| TreeEntry {
mode: *mode,
name: BString::from(*name),
oid: *oid,
})
.collect();
tree_entries.sort_by(|a, b| a.name.cmp(&b.name));
let tree = Tree {
entries: tree_entries,
};
db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
.expect("test operation should succeed")
}
#[test]
fn inexact_rename_detected_with_plausible_score() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
let opts = RenameDetectionOptions {
base: DiffNameStatusOptions {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
detect_inexact: true,
rename_threshold: DEFAULT_RENAME_THRESHOLD,
copy_threshold: DEFAULT_RENAME_THRESHOLD,
};
let entries = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
opts,
)
.expect("test operation should succeed");
assert_eq!(
entries.len(),
1,
"expected a single rename entry: {entries:?}"
);
assert_eq!(entries[0].status, NameStatus::Renamed(75));
assert_eq!(
entries[0].old_path.as_ref().map(|p| p.as_bytes()),
Some(b"a.txt".as_slice())
);
assert_eq!(entries[0].path, b"b.txt");
assert_eq!(entries[0].line(), "R075\ta.txt\tb.txt");
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn inexact_rename_below_threshold_not_detected() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let old = write_blob(&mut db, b"l1\nl2\nl3\nl4\nl5\nl6\n");
let new = write_blob(&mut db, b"l1\nl2\nl3\nX4\nX5\nX6\n");
let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
let opts = RenameDetectionOptions {
base: DiffNameStatusOptions {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
detect_inexact: true,
rename_threshold: 60,
copy_threshold: 60,
};
let entries = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
opts,
)
.expect("test operation should succeed");
let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
assert!(
statuses.contains(&NameStatus::Added) && statuses.contains(&NameStatus::Deleted),
"expected separate add/delete below threshold, got {entries:?}"
);
assert!(
!statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))),
"no rename should be reported below threshold: {entries:?}"
);
let opts_low = RenameDetectionOptions {
rename_threshold: 50,
..opts
};
let entries_low = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
opts_low,
)
.expect("test operation should succeed");
assert_eq!(entries_low.len(), 1);
assert_eq!(entries_low[0].status, NameStatus::Renamed(50));
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn exact_rename_scores_100_and_takes_priority() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let oid = write_blob(&mut db, b"identical\ncontent\nhere\n");
let left = write_tree(&mut db, &[(b"old.txt", 0o100644, oid)]);
let right = write_tree(&mut db, &[(b"new.txt", 0o100644, oid)]);
for inexact in [false, true] {
let opts = RenameDetectionOptions {
base: DiffNameStatusOptions {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
detect_inexact: inexact,
rename_threshold: DEFAULT_RENAME_THRESHOLD,
copy_threshold: DEFAULT_RENAME_THRESHOLD,
};
let entries = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
opts,
)
.expect("test operation should succeed");
assert_eq!(entries.len(), 1, "inexact={inexact}: {entries:?}");
assert_eq!(entries[0].status, NameStatus::Renamed(100));
assert_eq!(
entries[0].old_path.as_ref().map(|p| p.as_bytes()),
Some(b"old.txt".as_slice())
);
assert_eq!(entries[0].path, b"new.txt");
}
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn inexact_copy_detected_with_score() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let orig = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\neee\n");
let copy = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\nEEE\n");
let left = write_tree(&mut db, &[(b"orig.txt", 0o100644, orig.clone())]);
let right = write_tree(
&mut db,
&[(b"orig.txt", 0o100644, orig), (b"copy.txt", 0o100644, copy)],
);
let opts = RenameDetectionOptions {
base: DiffNameStatusOptions {
detect_renames: true,
detect_copies: true,
find_copies_harder: true,
rename_empty: true,
},
detect_inexact: true,
rename_threshold: DEFAULT_RENAME_THRESHOLD,
copy_threshold: DEFAULT_RENAME_THRESHOLD,
};
let entries = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
opts,
)
.expect("test operation should succeed");
let copy_entry = entries
.iter()
.find(|e| e.path == b"copy.txt")
.unwrap_or_else(|| panic!("no copy.txt entry: {entries:?}"));
assert_eq!(copy_entry.status, NameStatus::Copied(80));
assert_eq!(
copy_entry.old_path.as_ref().map(|p| p.as_bytes()),
Some(b"orig.txt".as_slice())
);
assert!(
entries.iter().all(|e| e.status != NameStatus::Deleted),
"copy must not delete the source: {entries:?}"
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn inexact_rename_with_small_edit_scores_88() {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let old = write_blob(
&mut db,
b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n",
);
let new = write_blob(
&mut db,
b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n",
);
let left = write_tree(&mut db, &[(b"src.txt", 0o100644, old)]);
let right = write_tree(&mut db, &[(b"dst.txt", 0o100644, new)]);
let opts = RenameDetectionOptions::inexact(DiffNameStatusOptions {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
});
let entries = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
opts,
)
.expect("test operation should succeed");
assert_eq!(entries.len(), 1, "{entries:?}");
assert_eq!(entries[0].status, NameStatus::Renamed(88));
assert_eq!(
entries[0].old_path.as_ref().map(|p| p.as_bytes()),
Some(b"src.txt".as_slice())
);
assert_eq!(entries[0].path, b"dst.txt");
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn inexact_disabled_default_preserves_exact_only_behavior() {
assert!(!RenameDetectionOptions::default().detect_inexact);
assert_eq!(
RenameDetectionOptions::default().rename_threshold,
DEFAULT_RENAME_THRESHOLD
);
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
let entries = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
RenameDetectionOptions::default(),
)
.expect("test operation should succeed");
let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
assert!(statuses.contains(&NameStatus::Added));
assert!(statuses.contains(&NameStatus::Deleted));
assert!(!statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))));
fs::remove_dir_all(root).expect("test operation should succeed");
}
fn apply_ops(old: &[DiffLine<'_>], new: &[DiffLine<'_>], ops: &[DiffOp]) -> Vec<u8> {
let mut oi = 0usize;
let mut ni = 0usize;
let mut rebuilt: Vec<u8> = Vec::new();
for op in ops {
match *op {
DiffOp::Equal(n) => {
for _ in 0..n {
assert_eq!(old[oi], new[ni], "Equal op covered unequal lines");
rebuilt.extend_from_slice(old[oi].content);
oi += 1;
ni += 1;
}
}
DiffOp::Delete(n) => oi += n,
DiffOp::Insert(n) => {
for _ in 0..n {
rebuilt.extend_from_slice(new[ni].content);
ni += 1;
}
}
}
}
assert_eq!(oi, old.len(), "script did not consume all of old");
assert_eq!(ni, new.len(), "script did not consume all of new");
rebuilt
}
fn assert_valid_script(old_bytes: &[u8], new_bytes: &[u8], ops: &[DiffOp]) {
let old = split_lines(old_bytes);
let new = split_lines(new_bytes);
let rebuilt = apply_ops(&old, &new, ops);
assert_eq!(rebuilt, new_bytes, "script did not rebuild new");
for pair in ops.windows(2) {
let same_kind = matches!(
(pair[0], pair[1]),
(DiffOp::Equal(_), DiffOp::Equal(_))
| (DiffOp::Delete(_), DiffOp::Delete(_))
| (DiffOp::Insert(_), DiffOp::Insert(_))
);
assert!(!same_kind, "ops not coalesced: {:?}", ops);
}
}
fn check_all_algorithms(old_bytes: &[u8], new_bytes: &[u8]) {
let old = split_lines(old_bytes);
let new = split_lines(new_bytes);
for algo in [
DiffAlgorithm::Myers,
DiffAlgorithm::Minimal,
DiffAlgorithm::Patience,
DiffAlgorithm::Histogram,
] {
let ops = diff_lines_with_algorithm(&old, &new, algo);
assert_valid_script(old_bytes, new_bytes, &ops);
}
}
#[test]
fn patience_and_histogram_match_myers_on_simple_cases() {
let cases: &[(&[u8], &[u8], Vec<DiffOp>)] = &[
(
b"a\nb\nc\n",
b"a\nx\nc\n",
vec![
DiffOp::Equal(1),
DiffOp::Delete(1),
DiffOp::Insert(1),
DiffOp::Equal(1),
],
),
(b"a\nb\nc\n", b"a\nb\nc\n", vec![DiffOp::Equal(3)]),
(b"", b"a\nb\n", vec![DiffOp::Insert(2)]),
(b"a\nb\n", b"", vec![DiffOp::Delete(2)]),
(
b"a\nb\nc\nd\n",
b"a\nc\nd\n",
vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)],
),
];
for (old_bytes, new_bytes, expected) in cases {
let old = split_lines(old_bytes);
let new = split_lines(new_bytes);
assert_eq!(&patience_diff_lines(&old, &new), expected);
assert_eq!(&histogram_diff_lines(&old, &new), expected);
assert_eq!(&myers_diff_lines(&old, &new), expected);
}
}
#[test]
fn patience_handles_both_empty() {
let empty = split_lines(b"");
assert!(patience_diff_lines(&empty, &empty).is_empty());
assert!(histogram_diff_lines(&empty, &empty).is_empty());
}
#[test]
fn patience_aligns_unique_anchors_across_moved_block() {
check_all_algorithms(
b"alpha\nbeta\ngamma\ndelta\n",
b"gamma\ndelta\nalpha\nbeta\n",
);
}
#[test]
fn histogram_differs_from_myers_keeping_block_contiguous() {
let old = b"b\na\n";
let new = b"a\nb\nb\na\nb\n";
let old_l = split_lines(old);
let new_l = split_lines(new);
let myers = myers_diff_lines(&old_l, &new_l);
let histogram = histogram_diff_lines(&old_l, &new_l);
assert_valid_script(old, new, &myers);
assert_valid_script(old, new, &histogram);
assert_eq!(
myers,
vec![
DiffOp::Insert(1),
DiffOp::Equal(1),
DiffOp::Insert(1),
DiffOp::Equal(1),
DiffOp::Insert(1),
]
);
assert_eq!(
histogram,
vec![DiffOp::Insert(2), DiffOp::Equal(2), DiffOp::Insert(1)]
);
assert_ne!(myers, histogram);
}
#[test]
fn patience_differs_from_myers_on_repeated_lines() {
let old = b"b\na\n";
let new = b"a\na\nb\n";
let old_l = split_lines(old);
let new_l = split_lines(new);
let myers = myers_diff_lines(&old_l, &new_l);
let patience = patience_diff_lines(&old_l, &new_l);
assert_valid_script(old, new, &myers);
assert_valid_script(old, new, &patience);
assert_eq!(
myers,
vec![DiffOp::Delete(1), DiffOp::Equal(1), DiffOp::Insert(2)]
);
assert_eq!(
patience,
vec![DiffOp::Insert(2), DiffOp::Equal(1), DiffOp::Delete(1)]
);
assert_ne!(myers, patience);
}
#[test]
fn realistic_function_insertion_all_valid() {
let old = b"int f() {\n return 1;\n}\n";
let new = b"int g() {\n return 2;\n}\n\nint f() {\n return 1;\n}\n";
check_all_algorithms(old, new);
}
#[test]
fn histogram_anchors_on_rare_line_when_no_unique_line_exists() {
check_all_algorithms(b"x\nx\nmid\nx\nx\n", b"x\nmid\nx\nx\nx\n");
check_all_algorithms(
b"dup\ndup\nrare\ndup\ndup\n",
b"dup\nrare\ndup\ndup\ndup\ndup\n",
);
}
#[test]
fn all_algorithms_treat_missing_final_newline_as_change() {
let old = split_lines(b"a\nb");
let new = split_lines(b"a\nb\n");
for algo in [
DiffAlgorithm::Myers,
DiffAlgorithm::Minimal,
DiffAlgorithm::Patience,
DiffAlgorithm::Histogram,
] {
let ops = diff_lines_with_algorithm(&old, &new, algo);
assert_eq!(
ops,
vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Insert(1)],
"algorithm {:?} mishandled missing final newline",
algo
);
}
}
#[test]
fn dispatcher_routes_each_variant() {
let old = split_lines(b"a\nb\nc\n");
let new = split_lines(b"a\nx\nc\n");
assert_eq!(
diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Myers),
myers_diff_lines(&old, &new)
);
assert_eq!(
diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Minimal),
myers_diff_lines(&old, &new)
);
assert_eq!(
diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Patience),
patience_diff_lines(&old, &new)
);
assert_eq!(
diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Histogram),
histogram_diff_lines(&old, &new)
);
}
#[test]
fn patience_recurses_into_gaps_between_anchors() {
let old = b"head\nmid1\nmid2\ntail\n";
let new = b"head\nMID\nmid2\ntail\n";
let old_l = split_lines(old);
let new_l = split_lines(new);
let ops = patience_diff_lines(&old_l, &new_l);
assert_eq!(
ops,
vec![
DiffOp::Equal(1),
DiffOp::Delete(1),
DiffOp::Insert(1),
DiffOp::Equal(2),
]
);
assert_valid_script(old, new, &ops);
}
#[test]
fn patience_falls_back_to_myers_with_no_unique_lines() {
let old = b"a\na\nb\nb\n";
let new = b"a\na\na\nb\n";
let old_l = split_lines(old);
let new_l = split_lines(new);
let ops = patience_diff_lines(&old_l, &new_l);
assert_valid_script(old, new, &ops);
}
#[test]
fn algorithms_agree_with_myers_when_all_lines_distinct() {
let cases: &[(&[u8], &[u8])] = &[
(b"a\nb\nc\nd\ne\n", b"a\nc\nd\nf\ne\n"),
(b"1\n2\n3\n4\n5\n6\n", b"1\n3\n2\n4\n6\n5\n"),
(b"q\nw\ne\nr\nt\ny\n", b"q\nw\nx\nr\nt\nz\n"),
];
for (old_bytes, new_bytes) in cases {
let old = split_lines(old_bytes);
let new = split_lines(new_bytes);
let myers = myers_diff_lines(&old, &new);
assert_eq!(
patience_diff_lines(&old, &new),
myers,
"patience must equal Myers when all lines are distinct: {:?}",
old_bytes
);
assert_eq!(
histogram_diff_lines(&old, &new),
myers,
"histogram must equal Myers when all lines are distinct: {:?}",
old_bytes
);
}
}
#[test]
fn fuzz_all_algorithms_reconstruct_new() {
let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
let mut next = || {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
(state >> 33) as u32
};
let alphabet = [b"a\n", b"b\n", b"c\n", b"d\n"];
let build = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
let len = (rng() % 9) as usize; let mut buf = Vec::new();
for _ in 0..len {
let pick = (rng() % alphabet.len() as u32) as usize;
buf.extend_from_slice(alphabet[pick]);
}
if !buf.is_empty() && rng().is_multiple_of(4) {
buf.pop();
}
buf
};
for _ in 0..400 {
let old_bytes = build(&mut next);
let new_bytes = build(&mut next);
check_all_algorithms(&old_bytes, &new_bytes);
}
}
#[test]
fn exhaustive_small_inputs_all_algorithms_reconstruct() {
let syms = [b"a\n".to_vec(), b"b\n".to_vec(), b"c\n".to_vec()];
let make = |n: usize, mut code: usize| -> Vec<u8> {
let mut v = Vec::new();
for _ in 0..n {
v.extend_from_slice(&syms[code % 3]);
code /= 3;
}
v
};
for la in 0..=5usize {
for lb in 0..=5usize {
for ca in 0..3usize.pow(la as u32) {
for cb in 0..3usize.pow(lb as u32) {
let ob = make(la, ca);
let nb = make(lb, cb);
let ol = split_lines(&ob);
let nl = split_lines(&nb);
assert_eq!(apply_ops(&ol, &nl, &myers_diff_lines(&ol, &nl)), nb);
assert_eq!(apply_ops(&ol, &nl, &patience_diff_lines(&ol, &nl)), nb);
assert_eq!(apply_ops(&ol, &nl, &histogram_diff_lines(&ol, &nl)), nb);
}
}
}
}
}
#[test]
fn fuzz_distinct_lines_patience_histogram_equal_myers() {
let mut state: u64 = 0x1234_5678_9ABC_DEF0;
let mut next = || {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
(state >> 33) as u32
};
for _ in 0..200 {
let pick_subseq = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
let mut buf = Vec::new();
for t in 0..10u32 {
if rng().is_multiple_of(2) {
buf.extend_from_slice(format!("{t}\n").as_bytes());
}
}
buf
};
let old_bytes = pick_subseq(&mut next);
let new_bytes = pick_subseq(&mut next);
let old = split_lines(&old_bytes);
let new = split_lines(&new_bytes);
let myers = myers_diff_lines(&old, &new);
assert_eq!(patience_diff_lines(&old, &new), myers);
assert_eq!(histogram_diff_lines(&old, &new), myers);
}
}
fn status_lines(entries: &[NameStatusEntry]) -> Vec<String> {
entries.iter().map(|entry| entry.line()).collect()
}
fn assert_tree_diff_matches_full(
db: &FileObjectDatabase,
left: &ObjectId,
right: &ObjectId,
options: DiffNameStatusOptions,
) {
let (full_left, full_right) = collect_full_tree_pair(db, ObjectFormat::Sha1, left, right)
.expect("test operation should succeed");
let reference = diff_name_status_maps(
&full_left,
&full_right,
full_left.keys().chain(full_right.keys()),
options,
)
.expect("test operation should succeed");
let (pruned_left, pruned_right) = changed_tree_entries(db, ObjectFormat::Sha1, left, right)
.expect("test operation should succeed");
let pruned = diff_name_status_maps(
&pruned_left,
&pruned_right,
pruned_left.keys().chain(pruned_right.keys()),
options,
)
.expect("test operation should succeed");
assert_eq!(
status_lines(&reference),
status_lines(&pruned),
"pruned map diff diverged from full map diff for {options:?}"
);
let public =
diff_name_status_trees_with_options(db, ObjectFormat::Sha1, left, right, options)
.expect("test operation should succeed");
assert_eq!(
status_lines(&reference),
status_lines(&public),
"public tree diff diverged from full map diff for {options:?}"
);
for (path, tracked) in &pruned_left {
assert_eq!(
full_left.get(path),
Some(tracked),
"pruned left entry not present (or differs) in full left map: {:?}",
String::from_utf8_lossy(path)
);
}
for (path, tracked) in &pruned_right {
assert_eq!(
full_right.get(path),
Some(tracked),
"pruned right entry not present (or differs) in full right map: {:?}",
String::from_utf8_lossy(path)
);
}
for entry in &reference {
let path = entry.path.as_bytes();
match entry.status {
NameStatus::Added => assert!(
pruned_right.contains_key(path),
"added path dropped by pruning: {:?}",
String::from_utf8_lossy(path)
),
NameStatus::Deleted => assert!(
pruned_left.contains_key(path),
"deleted path dropped by pruning: {:?}",
String::from_utf8_lossy(path)
),
NameStatus::Modified => {
assert!(
pruned_left.contains_key(path) && pruned_right.contains_key(path),
"modified path dropped by pruning: {:?}",
String::from_utf8_lossy(path)
);
}
_ => {}
}
}
}
fn assert_tree_diff_matches_full_all_modes(
db: &FileObjectDatabase,
left: &ObjectId,
right: &ObjectId,
) {
for detect_renames in [false, true] {
for detect_copies in [false, true] {
let options = DiffNameStatusOptions {
detect_renames,
detect_copies,
find_copies_harder: false,
rename_empty: true,
};
assert_tree_diff_matches_full(db, left, right, options);
}
}
}
fn structural_db() -> (PathBuf, FileObjectDatabase) {
let root = temp_root();
let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
(root, db)
}
#[test]
fn pruned_walk_skips_identical_subtree_and_matches_full() {
let (root, mut db) = structural_db();
let s1 = write_blob(&mut db, b"shared one\n");
let s2 = write_blob(&mut db, b"shared two\n");
let s3 = write_blob(&mut db, b"deep nested\n");
let shared_inner = write_tree(&mut db, &[(b"c.txt", 0o100644, s3.clone())]);
let shared = write_tree(
&mut db,
&[
(b"a.txt", 0o100644, s1.clone()),
(b"b.txt", 0o100644, s2.clone()),
(b"inner", 0o040000, shared_inner.clone()),
],
);
let app_old = write_blob(&mut db, b"version 1\n");
let app_new = write_blob(&mut db, b"version 2\n");
let app_left = write_tree(&mut db, &[(b"main.rs", 0o100644, app_old)]);
let app_right = write_tree(&mut db, &[(b"main.rs", 0o100644, app_new)]);
let left = write_tree(
&mut db,
&[
(b"app", 0o040000, app_left),
(b"shared", 0o040000, shared.clone()),
],
);
let right = write_tree(
&mut db,
&[(b"app", 0o040000, app_right), (b"shared", 0o040000, shared)],
);
let (pruned_left, pruned_right) =
changed_tree_entries(&db, ObjectFormat::Sha1, &left, &right)
.expect("test operation should succeed");
assert_eq!(
pruned_left.keys().collect::<Vec<_>>(),
vec![&b"app/main.rs".to_vec()],
"pruning should leave only the changed path on the left"
);
assert_eq!(
pruned_right.keys().collect::<Vec<_>>(),
vec![&b"app/main.rs".to_vec()],
"pruning should leave only the changed path on the right"
);
assert!(
!pruned_left.contains_key(b"shared/a.txt".as_slice()),
"identical shared subtree must not appear in pruned maps"
);
assert_tree_diff_matches_full_all_modes(&db, &left, &right);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn pruned_walk_matches_full_for_add_delete_modify_nested() {
let (root, mut db) = structural_db();
let keep = write_blob(&mut db, b"unchanged\n");
let untouched_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, keep.clone())]);
let nested_old = write_blob(&mut db, b"nested old\n");
let nested_new = write_blob(&mut db, b"nested new\n");
let dir_left = write_tree(
&mut db,
&[
(b"changed.txt", 0o100644, nested_old),
(b"stable.txt", 0o100644, keep.clone()),
],
);
let dir_right = write_tree(
&mut db,
&[
(b"changed.txt", 0o100644, nested_new),
(b"stable.txt", 0o100644, keep.clone()),
],
);
let only_left = write_blob(&mut db, b"will be deleted\n");
let only_right = write_blob(&mut db, b"freshly added\n");
let left = write_tree(
&mut db,
&[
(b"dir", 0o040000, dir_left),
(b"gone.txt", 0o100644, only_left),
(b"untouched", 0o040000, untouched_dir.clone()),
],
);
let right = write_tree(
&mut db,
&[
(b"dir", 0o040000, dir_right),
(b"new.txt", 0o100644, only_right),
(b"untouched", 0o040000, untouched_dir),
],
);
let entries = diff_name_status_trees_with_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
DiffNameStatusOptions {
detect_renames: false,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
)
.expect("test operation should succeed");
assert_eq!(
status_lines(&entries),
vec![
"M\tdir/changed.txt".to_string(),
"D\tgone.txt".to_string(),
"A\tnew.txt".to_string(),
],
"unexpected raw status for mixed nested diff"
);
assert_tree_diff_matches_full_all_modes(&db, &left, &right);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn pruned_walk_matches_full_for_rename_across_dirs() {
let (root, mut db) = structural_db();
let moved = write_blob(&mut db, b"i get moved across directories\n");
let companion = write_blob(&mut db, b"i stay put\n");
let stable_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, companion.clone())]);
let src_dir = write_tree(&mut db, &[(b"file.txt", 0o100644, moved.clone())]);
let dst_dir = write_tree(&mut db, &[(b"renamed.txt", 0o100644, moved.clone())]);
let left = write_tree(
&mut db,
&[
(b"src", 0o040000, src_dir),
(b"stable", 0o040000, stable_dir.clone()),
],
);
let right = write_tree(
&mut db,
&[
(b"dst", 0o040000, dst_dir),
(b"stable", 0o040000, stable_dir),
],
);
let entries = diff_name_status_trees_with_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
DiffNameStatusOptions {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
)
.expect("test operation should succeed");
assert_eq!(
status_lines(&entries),
vec!["R100\tsrc/file.txt\tdst/renamed.txt".to_string()],
"rename across dirs should be detected on pruned set"
);
assert_tree_diff_matches_full_all_modes(&db, &left, &right);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn pruned_walk_matches_full_for_binary_and_mode_change() {
let (root, mut db) = structural_db();
let bin_old = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 255, 1, 2, 3]);
let bin_new = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 254, 9, 8, 7]);
let script = write_blob(&mut db, b"#!/bin/sh\necho hi\n");
let left = write_tree(
&mut db,
&[
(b"image.bin", 0o100644, bin_old),
(b"run.sh", 0o100644, script.clone()),
],
);
let right = write_tree(
&mut db,
&[
(b"image.bin", 0o100644, bin_new),
(b"run.sh", 0o100755, script),
],
);
let entries = diff_name_status_trees_with_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
DiffNameStatusOptions {
detect_renames: false,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
)
.expect("test operation should succeed");
assert_eq!(
status_lines(&entries),
vec!["M\timage.bin".to_string(), "M\trun.sh".to_string()],
"binary edit and mode-only change should both be Modify"
);
assert_tree_diff_matches_full_all_modes(&db, &left, &right);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn pruned_walk_matches_full_for_dir_replaced_by_file() {
let (root, mut db) = structural_db();
let inner_a = write_blob(&mut db, b"inner a\n");
let inner_b = write_blob(&mut db, b"inner b\n");
let thing_dir = write_tree(
&mut db,
&[(b"a.txt", 0o100644, inner_a), (b"b.txt", 0o100644, inner_b)],
);
let thing_file = write_blob(&mut db, b"now i am a file\n");
let other_file = write_blob(&mut db, b"i was a file\n");
let other_inner = write_blob(&mut db, b"now nested\n");
let other_dir = write_tree(&mut db, &[(b"x.txt", 0o100644, other_inner)]);
let left = write_tree(
&mut db,
&[
(b"other", 0o100644, other_file),
(b"thing", 0o040000, thing_dir),
],
);
let right = write_tree(
&mut db,
&[
(b"other", 0o040000, other_dir),
(b"thing", 0o100644, thing_file),
],
);
let entries = diff_name_status_trees_with_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
DiffNameStatusOptions {
detect_renames: false,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
)
.expect("test operation should succeed");
assert_eq!(
status_lines(&entries),
vec![
"D\tother".to_string(),
"A\tother/x.txt".to_string(),
"A\tthing".to_string(),
"D\tthing/a.txt".to_string(),
"D\tthing/b.txt".to_string(),
],
"dir<->file swap should flatten to independent adds/deletes"
);
assert_tree_diff_matches_full_all_modes(&db, &left, &right);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn pruned_walk_matches_full_for_identical_trees() {
let (root, mut db) = structural_db();
let blob = write_blob(&mut db, b"same\n");
let sub = write_tree(&mut db, &[(b"f.txt", 0o100644, blob.clone())]);
let tree = write_tree(
&mut db,
&[(b"sub", 0o040000, sub), (b"top.txt", 0o100644, blob)],
);
let (pruned_left, pruned_right) =
changed_tree_entries(&db, ObjectFormat::Sha1, &tree, &tree)
.expect("test operation should succeed");
assert!(
pruned_left.is_empty() && pruned_right.is_empty(),
"identical trees must produce no changed entries"
);
let entries = diff_name_status_trees_with_options(
&db,
ObjectFormat::Sha1,
&tree,
&tree,
DiffNameStatusOptions::default(),
)
.expect("test operation should succeed");
assert!(entries.is_empty(), "identical trees must produce no diff");
assert_tree_diff_matches_full_all_modes(&db, &tree, &tree);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn find_copies_harder_uses_full_left_map_and_finds_unchanged_source() {
let (root, mut db) = structural_db();
let template = write_blob(&mut db, b"reusable boilerplate content\n");
let lib_dir = write_tree(&mut db, &[(b"template.txt", 0o100644, template.clone())]);
let trigger_old = write_blob(&mut db, b"trigger old\n");
let trigger_new = write_blob(&mut db, b"trigger new\n");
let left = write_tree(
&mut db,
&[
(b"lib", 0o040000, lib_dir.clone()),
(b"trigger.txt", 0o100644, trigger_old),
],
);
let right = write_tree(
&mut db,
&[
(b"copy.txt", 0o100644, template.clone()),
(b"lib", 0o040000, lib_dir),
(b"trigger.txt", 0o100644, trigger_new),
],
);
let options = DiffNameStatusOptions {
detect_renames: true,
detect_copies: true,
find_copies_harder: true,
rename_empty: true,
};
let (full_left, full_right) =
collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
.expect("test operation should succeed");
let reference = diff_name_status_maps(
&full_left,
&full_right,
full_left.keys().chain(full_right.keys()),
options,
)
.expect("test operation should succeed");
let public =
diff_name_status_trees_with_options(&db, ObjectFormat::Sha1, &left, &right, options)
.expect("test operation should succeed");
assert_eq!(
status_lines(&reference),
status_lines(&public),
"find-copies-harder public diff must match full-map reference"
);
assert!(
public
.iter()
.any(|entry| matches!(entry.status, NameStatus::Copied(_))
&& entry.old_path.as_ref().map(|p| p.as_bytes())
== Some(b"lib/template.txt".as_slice())
&& entry.path == b"copy.txt"),
"copy from unchanged source must be found with find_copies_harder: {public:?}"
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn pruned_walk_matches_full_with_inexact_rename_options() {
let (root, mut db) = structural_db();
let untouched = write_blob(&mut db, b"untouched file\n");
let untouched_dir = write_tree(&mut db, &[(b"u.txt", 0o100644, untouched.clone())]);
let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
let left = write_tree(
&mut db,
&[
(b"a.txt", 0o100644, old),
(b"keep", 0o040000, untouched_dir.clone()),
],
);
let right = write_tree(
&mut db,
&[
(b"b.txt", 0o100644, new),
(b"keep", 0o040000, untouched_dir),
],
);
let options = RenameDetectionOptions {
base: DiffNameStatusOptions {
detect_renames: true,
detect_copies: false,
find_copies_harder: false,
rename_empty: true,
},
detect_inexact: true,
rename_threshold: DEFAULT_RENAME_THRESHOLD,
copy_threshold: DEFAULT_RENAME_THRESHOLD,
};
let (full_left, full_right) =
collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
.expect("test operation should succeed");
let reference = diff_name_status_maps_with_renames(
&full_left,
&full_right,
full_left.keys().chain(full_right.keys()),
options,
|oid| read_blob_bytes(&db, oid),
)
.expect("test operation should succeed");
let public = diff_name_status_trees_with_rename_options(
&db,
ObjectFormat::Sha1,
&left,
&right,
options,
)
.expect("test operation should succeed");
assert_eq!(
status_lines(&reference),
status_lines(&public),
"inexact rename via pruned walk must match full-map reference"
);
assert_eq!(
status_lines(&public),
vec!["R075\ta.txt\tb.txt".to_string()],
"expected a 75% inexact rename"
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
}