1use sley_core::{GitError, ObjectFormat, ObjectId, RepoPath, Result, object_id_for_bytes};
2
3pub mod render;
4pub mod ws;
5
6pub use sley_core::BString;
7use sley_index::{BorrowedIndex, Index, IndexStatCache};
8use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntries, TreeEntry};
9use sley_odb::{FileObjectDatabase, ObjectReader, ObjectWriter};
10use sley_refs::{FileRefStore, RefTarget};
11use std::collections::{BTreeMap, BTreeSet, HashMap};
12use std::fs;
13use std::path::{Path, PathBuf};
14
15pub fn gitlink_git_dir(sub_root: &Path) -> Option<PathBuf> {
33 let dot_git = sub_root.join(".git");
34 let metadata = fs::symlink_metadata(&dot_git).ok()?;
35 if metadata.is_dir() {
36 return Some(dot_git);
37 }
38 if !metadata.is_file() {
39 return None;
40 }
41 let contents = fs::read_to_string(&dot_git).ok()?;
42 let target = contents.strip_prefix("gitdir:")?.trim();
43 if target.is_empty() {
44 return None;
45 }
46 let target = PathBuf::from(target);
47 let git_dir = if target.is_absolute() {
48 target
49 } else {
50 sub_root.join(target)
51 };
52 if git_dir.is_dir() {
53 Some(git_dir)
54 } else {
55 None
56 }
57}
58
59pub fn gitlink_head_oid(sub_root: &Path, format: ObjectFormat) -> Option<ObjectId> {
65 let git_dir = gitlink_git_dir(sub_root)?;
66 let store = FileRefStore::new(&git_dir, format);
67 let mut target = store.read_ref("HEAD").ok()??;
68 for _ in 0..10 {
70 match target {
71 RefTarget::Direct(oid) => return Some(oid),
72 RefTarget::Symbolic(name) => target = store.read_ref(&name).ok()??,
73 }
74 }
75 None
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub struct DiffLine<'a> {
96 pub content: &'a [u8],
98 pub has_newline: bool,
100}
101
102impl<'a> DiffLine<'a> {
103 pub fn bytes_without_newline(&self) -> &'a [u8] {
105 if self.has_newline {
106 self.content.strip_suffix(b"\n").unwrap_or(self.content)
107 } else {
108 self.content
109 }
110 }
111}
112
113pub fn split_lines(blob: &[u8]) -> Vec<DiffLine<'_>> {
120 let mut lines = Vec::new();
121 let mut start = 0usize;
122 let len = blob.len();
123 let mut idx = 0usize;
124 while idx < len {
125 if blob[idx] == b'\n' {
126 lines.push(DiffLine {
127 content: &blob[start..=idx],
128 has_newline: true,
129 });
130 idx += 1;
131 start = idx;
132 } else {
133 idx += 1;
134 }
135 }
136 if start < len {
137 lines.push(DiffLine {
138 content: &blob[start..len],
139 has_newline: false,
140 });
141 }
142 lines
143}
144
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum DiffOp {
156 Equal(usize),
158 Delete(usize),
160 Insert(usize),
162}
163
164pub fn myers_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
173 let n_total = old.len();
176 let m_total = new.len();
177 let mut prefix = 0usize;
178 while prefix < n_total && prefix < m_total && old[prefix] == new[prefix] {
179 prefix += 1;
180 }
181 let mut suffix = 0usize;
182 while suffix < n_total - prefix
183 && suffix < m_total - prefix
184 && old[n_total - 1 - suffix] == new[m_total - 1 - suffix]
185 {
186 suffix += 1;
187 }
188
189 let old_mid = &old[prefix..n_total - suffix];
190 let new_mid = &new[prefix..m_total - suffix];
191
192 let mut ops: Vec<DiffOp> = Vec::new();
193 if prefix > 0 {
194 ops.push(DiffOp::Equal(prefix));
195 }
196 myers_core(old_mid, new_mid, &mut ops);
197 if suffix > 0 {
198 ops.push(DiffOp::Equal(suffix));
199 }
200 coalesce_ops(ops)
201}
202
203fn myers_core(old: &[DiffLine<'_>], new: &[DiffLine<'_>], out: &mut Vec<DiffOp>) {
211 let n = old.len() as isize;
212 let m = new.len() as isize;
213 if n == 0 {
214 if m > 0 {
215 out.push(DiffOp::Insert(m as usize));
216 }
217 return;
218 }
219 if m == 0 {
220 out.push(DiffOp::Delete(n as usize));
221 return;
222 }
223
224 let max = (n + m) as usize;
225 let offset = max as isize; let width = 2 * max + 1;
227 let mut v = vec![0isize; width];
229 let mut trace: Vec<Vec<isize>> = Vec::new();
231
232 let mut found_d: Option<usize> = None;
233 'search: for d in 0..=(max as isize) {
234 trace.push(v.clone());
235 let mut k = -d;
236 while k <= d {
237 let kidx = (k + offset) as usize;
238 let mut x = if k == -d
241 || (k != d && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
242 {
243 v[(k + 1 + offset) as usize]
245 } else {
246 v[(k - 1 + offset) as usize] + 1
248 };
249 let mut y = x - k;
250 while x < n && y < m && old[x as usize] == new[y as usize] {
252 x += 1;
253 y += 1;
254 }
255 v[kidx] = x;
256 if x >= n && y >= m {
257 found_d = Some(d as usize);
258 break 'search;
259 }
260 k += 2;
261 }
262 }
263
264 let Some(d_end) = found_d else {
267 out.push(DiffOp::Delete(n as usize));
268 out.push(DiffOp::Insert(m as usize));
269 return;
270 };
271
272 backtrack(n, m, &trace, d_end, offset, out);
273}
274
275fn backtrack(
281 n: isize,
282 m: isize,
283 trace: &[Vec<isize>],
284 d_end: usize,
285 offset: isize,
286 out: &mut Vec<DiffOp>,
287) {
288 let mut x = n;
289 let mut y = m;
290 let mut rev: Vec<DiffOp> = Vec::new();
291
292 for d in (0..=d_end).rev() {
293 let v = &trace[d];
294 let k = x - y;
295 let prev_k = if k == -(d as isize)
297 || (k != d as isize && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
298 {
299 k + 1 } else {
301 k - 1 };
303 let prev_x = v[(prev_k + offset) as usize];
304 let prev_y = prev_x - prev_k;
305
306 while x > prev_x && y > prev_y {
308 rev.push(DiffOp::Equal(1));
309 x -= 1;
310 y -= 1;
311 }
312 if d > 0 {
313 if x == prev_x {
314 rev.push(DiffOp::Insert(1));
316 } else {
317 rev.push(DiffOp::Delete(1));
319 }
320 x = prev_x;
321 y = prev_y;
322 }
323 }
324
325 rev.reverse();
326 out.extend(rev);
327}
328
329fn coalesce_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
331 let mut out: Vec<DiffOp> = Vec::with_capacity(ops.len());
332 for op in ops {
333 match (out.last_mut(), op) {
334 (Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
335 (Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
336 (Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
337 _ => out.push(op),
338 }
339 }
340 out
341}
342
343type LineKey<'a> = (&'a [u8], bool);
369
370#[inline]
371fn line_key<'a>(line: &DiffLine<'a>) -> LineKey<'a> {
372 (line.content, line.has_newline)
373}
374
375pub fn patience_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
389 let mut ops: Vec<DiffOp> = Vec::new();
390 patience_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
391 coalesce_ops(ops)
392}
393
394pub fn histogram_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
406 let mut ops: Vec<DiffOp> = Vec::new();
407 histogram_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
408 coalesce_ops(ops)
409}
410
411pub fn diff_lines_with_algorithm(
424 old: &[DiffLine<'_>],
425 new: &[DiffLine<'_>],
426 algorithm: DiffAlgorithm,
427) -> Vec<DiffOp> {
428 match algorithm {
429 DiffAlgorithm::Myers | DiffAlgorithm::Minimal => myers_diff_lines(old, new),
430 DiffAlgorithm::Patience => patience_diff_lines(old, new),
431 DiffAlgorithm::Histogram => histogram_diff_lines(old, new),
432 }
433}
434
435fn emit_trivial_range(a0: usize, a1: usize, b0: usize, b1: usize, out: &mut Vec<DiffOp>) -> bool {
442 let old_len = a1 - a0;
443 let new_len = b1 - b0;
444 if old_len == 0 && new_len == 0 {
445 return true;
446 }
447 if old_len == 0 {
448 out.push(DiffOp::Insert(new_len));
449 return true;
450 }
451 if new_len == 0 {
452 out.push(DiffOp::Delete(old_len));
453 return true;
454 }
455 false
456}
457
458fn trim_common(
466 old: &[DiffLine<'_>],
467 new: &[DiffLine<'_>],
468 mut a0: usize,
469 mut a1: usize,
470 mut b0: usize,
471 mut b1: usize,
472 out: &mut Vec<DiffOp>,
473) -> (usize, usize, usize, usize, usize) {
474 let mut prefix = 0usize;
475 while a0 < a1 && b0 < b1 && old[a0] == new[b0] {
476 a0 += 1;
477 b0 += 1;
478 prefix += 1;
479 }
480 if prefix > 0 {
481 out.push(DiffOp::Equal(prefix));
482 }
483 let mut suffix = 0usize;
484 while a1 > a0 && b1 > b0 && old[a1 - 1] == new[b1 - 1] {
485 a1 -= 1;
486 b1 -= 1;
487 suffix += 1;
488 }
489 (a0, a1, b0, b1, suffix)
490}
491
492fn patience_recurse(
494 old: &[DiffLine<'_>],
495 new: &[DiffLine<'_>],
496 a0: usize,
497 a1: usize,
498 b0: usize,
499 b1: usize,
500 out: &mut Vec<DiffOp>,
501) {
502 if emit_trivial_range(a0, a1, b0, b1, out) {
503 return;
504 }
505 let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
506 if !emit_trivial_range(a0, a1, b0, b1, out) {
507 match patience_anchors(old, new, a0, a1, b0, b1) {
508 Some(anchors) => {
509 let mut cur_a = a0;
512 let mut cur_b = b0;
513 for (ai, bi) in anchors {
514 patience_recurse(old, new, cur_a, ai, cur_b, bi, out);
515 out.push(DiffOp::Equal(1));
516 cur_a = ai + 1;
517 cur_b = bi + 1;
518 }
519 patience_recurse(old, new, cur_a, a1, cur_b, b1, out);
521 }
522 None => myers_core(&old[a0..a1], &new[b0..b1], out),
525 }
526 }
527 if suffix > 0 {
528 out.push(DiffOp::Equal(suffix));
529 }
530}
531
532fn patience_anchors(
541 old: &[DiffLine<'_>],
542 new: &[DiffLine<'_>],
543 a0: usize,
544 a1: usize,
545 b0: usize,
546 b1: usize,
547) -> Option<Vec<(usize, usize)>> {
548 struct Occ {
551 count: usize,
552 pos: usize,
553 }
554 let mut in_old: HashMap<LineKey<'_>, Occ> = HashMap::new();
555 for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
556 in_old
557 .entry(line_key(line))
558 .and_modify(|o| o.count += 1)
559 .or_insert(Occ { count: 1, pos: i });
560 }
561 let mut in_new: HashMap<LineKey<'_>, Occ> = HashMap::new();
562 for (j, line) in new.iter().enumerate().take(b1).skip(b0) {
563 in_new
564 .entry(line_key(line))
565 .and_modify(|o| o.count += 1)
566 .or_insert(Occ { count: 1, pos: j });
567 }
568
569 let mut pairs: Vec<(usize, usize)> = Vec::new();
571 for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
572 let key = line_key(line);
573 let Some(o) = in_old.get(&key) else { continue };
574 if o.count != 1 || o.pos != i {
575 continue;
576 }
577 if let Some(n) = in_new.get(&key)
579 && n.count == 1
580 {
581 pairs.push((i, n.pos));
582 }
583 }
584 if pairs.is_empty() {
585 return None;
586 }
587
588 let lis = longest_increasing_by_new(&pairs);
592 if lis.is_empty() { None } else { Some(lis) }
593}
594
595fn longest_increasing_by_new(pairs: &[(usize, usize)]) -> Vec<(usize, usize)> {
603 if pairs.is_empty() {
604 return Vec::new();
605 }
606 let mut tails: Vec<usize> = Vec::new();
609 let mut prev: Vec<Option<usize>> = vec![None; pairs.len()];
611
612 for i in 0..pairs.len() {
613 let val = pairs[i].1;
614 let mut lo = 0usize;
616 let mut hi = tails.len();
617 while lo < hi {
618 let mid = lo + (hi - lo) / 2;
619 if pairs[tails[mid]].1 < val {
620 lo = mid + 1;
621 } else {
622 hi = mid;
623 }
624 }
625 if lo > 0 {
626 prev[i] = Some(tails[lo - 1]);
627 }
628 if lo == tails.len() {
629 tails.push(i);
630 } else {
631 tails[lo] = i;
632 }
633 }
634
635 let mut result: Vec<(usize, usize)> = Vec::with_capacity(tails.len());
637 let mut cur = tails.last().copied();
638 while let Some(i) = cur {
639 result.push(pairs[i]);
640 cur = prev[i];
641 }
642 result.reverse();
643 result
644}
645
646fn histogram_recurse(
648 old: &[DiffLine<'_>],
649 new: &[DiffLine<'_>],
650 a0: usize,
651 a1: usize,
652 b0: usize,
653 b1: usize,
654 out: &mut Vec<DiffOp>,
655) {
656 if emit_trivial_range(a0, a1, b0, b1, out) {
657 return;
658 }
659 let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
660 if !emit_trivial_range(a0, a1, b0, b1, out) {
661 match histogram_region(old, new, a0, a1, b0, b1) {
662 Some(region) => {
663 histogram_recurse(old, new, a0, region.old_start, b0, region.new_start, out);
666 out.push(DiffOp::Equal(region.len));
667 histogram_recurse(
668 old,
669 new,
670 region.old_start + region.len,
671 a1,
672 region.new_start + region.len,
673 b1,
674 out,
675 );
676 }
677 None => myers_core(&old[a0..a1], &new[b0..b1], out),
679 }
680 }
681 if suffix > 0 {
682 out.push(DiffOp::Equal(suffix));
683 }
684}
685
686struct HistogramRegion {
688 old_start: usize,
689 new_start: usize,
690 len: usize,
691}
692
693fn histogram_region(
703 old: &[DiffLine<'_>],
704 new: &[DiffLine<'_>],
705 a0: usize,
706 a1: usize,
707 b0: usize,
708 b1: usize,
709) -> Option<HistogramRegion> {
710 let mut buckets: HashMap<LineKey<'_>, Vec<usize>> = HashMap::new();
712 for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
713 buckets.entry(line_key(line)).or_default().push(i);
714 }
715
716 let mut best: Option<HistogramRegion> = None;
717 let mut best_count = usize::MAX;
719 let mut best_len = 0usize;
720
721 let mut bj = b0;
722 while bj < b1 {
723 let key = line_key(&new[bj]);
724 let Some(positions) = buckets.get(&key) else {
725 bj += 1;
726 continue;
727 };
728 let occ = positions.len();
729 let mut next_bj = bj + 1;
732 for &ai in positions {
733 let mut start_a = ai;
735 let mut start_b = bj;
736 while start_a > a0 && start_b > b0 && old[start_a - 1] == new[start_b - 1] {
737 start_a -= 1;
738 start_b -= 1;
739 }
740 let mut len = 0usize;
742 while start_a + len < a1
743 && start_b + len < b1
744 && old[start_a + len] == new[start_b + len]
745 {
746 len += 1;
747 }
748 let run_count = occ;
751 let better = run_count < best_count || (run_count == best_count && len > best_len);
752 if better && len > 0 {
753 best_count = run_count;
754 best_len = len;
755 best = Some(HistogramRegion {
756 old_start: start_a,
757 new_start: start_b,
758 len,
759 });
760 if start_b + len > next_bj {
763 next_bj = start_b + len;
764 }
765 }
766 }
767 bj = next_bj.max(bj + 1);
768 }
769
770 best
771}
772
773#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
775pub enum ConflictStyle {
776 #[default]
778 Merge,
779 Diff3,
782}
783
784#[derive(Debug, Clone, Copy)]
786pub struct MergeBlobOptions<'a> {
787 pub ours_label: &'a str,
789 pub theirs_label: &'a str,
791 pub base_label: &'a str,
793 pub style: ConflictStyle,
795}
796
797impl Default for MergeBlobOptions<'_> {
798 fn default() -> Self {
799 Self {
800 ours_label: "ours",
801 theirs_label: "theirs",
802 base_label: "base",
803 style: ConflictStyle::Merge,
804 }
805 }
806}
807
808#[derive(Debug, Clone, PartialEq, Eq)]
810pub struct MergeBlobResult {
811 pub content: Vec<u8>,
813 pub conflicted: bool,
815}
816
817pub fn merge_blobs(
832 base: &[u8],
833 ours: &[u8],
834 theirs: &[u8],
835 options: &MergeBlobOptions<'_>,
836) -> MergeBlobResult {
837 let base_lines = split_lines(base);
838 let ours_lines = split_lines(ours);
839 let theirs_lines = split_lines(theirs);
840
841 let ours_matches = matching_regions(&base_lines, &ours_lines);
844 let theirs_matches = matching_regions(&base_lines, &theirs_lines);
845
846 let stable = common_stable_segments(&ours_matches, &theirs_matches);
850
851 let mut writer = MergeWriter::new(options);
852 let mut base_idx = 0usize;
854 let mut our_idx = 0usize;
855 let mut their_idx = 0usize;
856
857 for seg in &stable {
858 let base_region = &base_lines[base_idx..seg.base_start];
860 let our_region = &ours_lines[our_idx..seg.ours_start];
861 let their_region = &theirs_lines[their_idx..seg.theirs_start];
862 emit_region(&mut writer, base_region, our_region, their_region);
863
864 writer.emit_lines(&base_lines[seg.base_start..seg.base_start + seg.len]);
866
867 base_idx = seg.base_start + seg.len;
868 our_idx = seg.ours_start + seg.len;
869 their_idx = seg.theirs_start + seg.len;
870 }
871
872 emit_region(
875 &mut writer,
876 &base_lines[base_idx..],
877 &ours_lines[our_idx..],
878 &theirs_lines[their_idx..],
879 );
880
881 writer.finish()
882}
883
884fn emit_region(
887 writer: &mut MergeWriter<'_>,
888 base_region: &[DiffLine<'_>],
889 our_region: &[DiffLine<'_>],
890 their_region: &[DiffLine<'_>],
891) {
892 if our_region.is_empty() && their_region.is_empty() {
893 return;
894 }
895 let our_changed = our_region != base_region;
896 let their_changed = their_region != base_region;
897 match (our_changed, their_changed) {
898 (false, false) => writer.emit_lines(base_region),
899 (true, false) => writer.emit_lines(our_region),
900 (false, true) => writer.emit_lines(their_region),
901 (true, true) => {
902 if our_region == their_region {
903 writer.emit_lines(our_region);
905 } else {
906 writer.emit_conflict(our_region, base_region, their_region);
907 }
908 }
909 }
910}
911
912#[derive(Debug, Clone, Copy)]
915struct MatchRegion {
916 base_start: usize,
917 side_start: usize,
918 len: usize,
919}
920
921#[derive(Debug, Clone, Copy)]
923struct StableSegment {
924 base_start: usize,
925 ours_start: usize,
926 theirs_start: usize,
927 len: usize,
928}
929
930fn matching_regions(base: &[DiffLine<'_>], side: &[DiffLine<'_>]) -> Vec<MatchRegion> {
936 let ops = myers_diff_lines(base, side);
937 let mut regions = Vec::new();
938 let mut base_idx = 0usize;
939 let mut side_idx = 0usize;
940 for op in ops {
941 match op {
942 DiffOp::Equal(n) => {
943 regions.push(MatchRegion {
944 base_start: base_idx,
945 side_start: side_idx,
946 len: n,
947 });
948 base_idx += n;
949 side_idx += n;
950 }
951 DiffOp::Delete(n) => base_idx += n,
952 DiffOp::Insert(n) => side_idx += n,
953 }
954 }
955 regions
956}
957
958fn common_stable_segments(ours: &[MatchRegion], theirs: &[MatchRegion]) -> Vec<StableSegment> {
966 let mut segments = Vec::new();
967 let mut oi = 0usize;
968 let mut ti = 0usize;
969 while oi < ours.len() && ti < theirs.len() {
970 let o = ours[oi];
971 let t = theirs[ti];
972 let o_end = o.base_start + o.len;
973 let t_end = t.base_start + t.len;
974 let lo = o.base_start.max(t.base_start);
975 let hi = o_end.min(t_end);
976 if lo < hi {
977 segments.push(StableSegment {
978 base_start: lo,
979 ours_start: o.side_start + (lo - o.base_start),
980 theirs_start: t.side_start + (lo - t.base_start),
981 len: hi - lo,
982 });
983 }
984 if o_end <= t_end {
986 oi += 1;
987 } else {
988 ti += 1;
989 }
990 }
991 segments
992}
993
994struct MergeWriter<'a> {
997 out: Vec<u8>,
998 conflicted: bool,
999 options: &'a MergeBlobOptions<'a>,
1000}
1001
1002impl<'a> MergeWriter<'a> {
1003 fn new(options: &'a MergeBlobOptions<'a>) -> Self {
1004 Self {
1005 out: Vec::new(),
1006 conflicted: false,
1007 options,
1008 }
1009 }
1010
1011 fn emit_lines(&mut self, lines: &[DiffLine<'_>]) {
1014 for line in lines {
1015 self.out.extend_from_slice(line.content);
1016 }
1017 }
1018
1019 fn emit_conflict(
1025 &mut self,
1026 ours: &[DiffLine<'_>],
1027 base: &[DiffLine<'_>],
1028 theirs: &[DiffLine<'_>],
1029 ) {
1030 self.conflicted = true;
1031 self.write_marker(b'<', self.options.ours_label);
1032 self.emit_section(ours);
1033 if self.options.style == ConflictStyle::Diff3 {
1034 self.ensure_newline();
1035 self.write_marker(b'|', self.options.base_label);
1036 self.emit_section(base);
1037 }
1038 self.ensure_newline();
1039 self.write_divider();
1040 self.emit_section(theirs);
1041 self.ensure_newline();
1042 self.write_marker(b'>', self.options.theirs_label);
1043 }
1044
1045 fn emit_section(&mut self, lines: &[DiffLine<'_>]) {
1047 for line in lines {
1048 self.out.extend_from_slice(line.content);
1049 }
1050 }
1051
1052 fn ensure_newline(&mut self) {
1055 if !self.out.is_empty() && self.out.last() != Some(&b'\n') {
1056 self.out.push(b'\n');
1057 }
1058 }
1059
1060 fn write_marker(&mut self, ch: u8, label: &str) {
1064 for _ in 0..7 {
1065 self.out.push(ch);
1066 }
1067 if !label.is_empty() {
1068 self.out.push(b' ');
1069 self.out.extend_from_slice(label.as_bytes());
1070 }
1071 self.out.push(b'\n');
1072 }
1073
1074 fn write_divider(&mut self) {
1076 for _ in 0..7 {
1077 self.out.push(b'=');
1078 }
1079 self.out.push(b'\n');
1080 }
1081
1082 fn finish(self) -> MergeBlobResult {
1083 MergeBlobResult {
1084 content: self.out,
1085 conflicted: self.conflicted,
1086 }
1087 }
1088}
1089
1090#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1091pub enum DiffAlgorithm {
1092 Myers,
1093 Minimal,
1094 Patience,
1095 Histogram,
1096}
1097
1098#[derive(Debug, Clone, PartialEq, Eq)]
1099pub enum FileChange {
1100 Add { path: RepoPath },
1101 Delete { path: RepoPath },
1102 Modify { path: RepoPath },
1103 Rename { old: RepoPath, new: RepoPath },
1104 Copy { source: RepoPath, dest: RepoPath },
1105}
1106
1107#[derive(Debug, Clone, PartialEq, Eq)]
1108pub struct Conflict {
1109 pub path: RepoPath,
1110 pub ours: Vec<u8>,
1111 pub theirs: Vec<u8>,
1112}
1113
1114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1115pub enum NameStatus {
1116 Added,
1117 Deleted,
1118 Modified,
1119 Renamed(u8),
1120 Copied(u8),
1121}
1122
1123impl NameStatus {
1124 pub const fn code(self) -> char {
1125 match self {
1126 Self::Added => 'A',
1127 Self::Deleted => 'D',
1128 Self::Modified => 'M',
1129 Self::Renamed(_) => 'R',
1130 Self::Copied(_) => 'C',
1131 }
1132 }
1133
1134 pub fn label(self) -> String {
1135 match self {
1136 Self::Renamed(score) => format!("R{score:03}"),
1137 Self::Copied(score) => format!("C{score:03}"),
1138 _ => self.code().to_string(),
1139 }
1140 }
1141}
1142
1143#[derive(Debug, Clone, PartialEq, Eq)]
1144pub struct NameStatusEntry {
1145 pub status: NameStatus,
1146 pub path: BString,
1147 pub old_path: Option<BString>,
1148 pub old_mode: Option<u32>,
1149 pub new_mode: Option<u32>,
1150 pub old_oid: Option<ObjectId>,
1151 pub new_oid: Option<ObjectId>,
1152}
1153
1154impl NameStatusEntry {
1155 pub fn line(&self) -> String {
1156 if let Some(old_path) = &self.old_path {
1157 format!(
1158 "{}\t{}\t{}",
1159 self.status.label(),
1160 String::from_utf8_lossy(old_path.as_bytes()),
1161 String::from_utf8_lossy(self.path.as_bytes())
1162 )
1163 } else {
1164 format!(
1165 "{}\t{}",
1166 self.status.label(),
1167 String::from_utf8_lossy(self.path.as_bytes())
1168 )
1169 }
1170 }
1171}
1172
1173#[derive(Debug, Clone, PartialEq, Eq)]
1174pub struct IndexGitlinkEntry {
1175 pub path: BString,
1176 pub oid: ObjectId,
1177}
1178
1179#[derive(Debug, Clone, PartialEq, Eq)]
1180pub struct IndexWorktreeDiff {
1181 pub entries: Vec<NameStatusEntry>,
1182 pub staged_gitlinks: Vec<IndexGitlinkEntry>,
1183}
1184
1185#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1186pub struct DiffNameStatusOptions {
1187 pub detect_renames: bool,
1188 pub detect_copies: bool,
1189 pub find_copies_harder: bool,
1190 pub rename_empty: bool,
1191}
1192
1193impl Default for DiffNameStatusOptions {
1194 fn default() -> Self {
1195 Self {
1196 detect_renames: true,
1197 detect_copies: false,
1198 find_copies_harder: false,
1199 rename_empty: true,
1200 }
1201 }
1202}
1203
1204pub const DEFAULT_RENAME_THRESHOLD: u8 = 50;
1208
1209#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1225pub struct RenameDetectionOptions {
1226 pub base: DiffNameStatusOptions,
1229 pub detect_inexact: bool,
1232 pub rename_threshold: u8,
1236 pub copy_threshold: u8,
1240}
1241
1242impl Default for RenameDetectionOptions {
1243 fn default() -> Self {
1244 Self {
1245 base: DiffNameStatusOptions::default(),
1246 detect_inexact: false,
1247 rename_threshold: DEFAULT_RENAME_THRESHOLD,
1248 copy_threshold: DEFAULT_RENAME_THRESHOLD,
1249 }
1250 }
1251}
1252
1253impl RenameDetectionOptions {
1254 pub fn inexact(base: DiffNameStatusOptions) -> Self {
1257 Self {
1258 base,
1259 detect_inexact: true,
1260 ..Self::default()
1261 }
1262 }
1263}
1264
1265pub fn diff_name_status_head_worktree(
1266 worktree_root: impl AsRef<Path>,
1267 git_dir: impl AsRef<Path>,
1268 format: ObjectFormat,
1269) -> Result<Vec<NameStatusEntry>> {
1270 diff_name_status_head_worktree_with_options(
1271 worktree_root,
1272 git_dir,
1273 format,
1274 DiffNameStatusOptions::default(),
1275 )
1276}
1277
1278pub fn diff_name_status_head_worktree_with_options(
1279 worktree_root: impl AsRef<Path>,
1280 git_dir: impl AsRef<Path>,
1281 format: ObjectFormat,
1282 options: DiffNameStatusOptions,
1283) -> Result<Vec<NameStatusEntry>> {
1284 let worktree_root = worktree_root.as_ref();
1285 let git_dir = git_dir.as_ref();
1286 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1287 let head = head_tree_entries(git_dir, format, &db)?;
1288 let IndexSnapshot {
1289 entries: index,
1290 stat_cache,
1291 } = read_index_snapshot(git_dir, format)?;
1292 let index_gitlinks = index_gitlinks(&index);
1293 let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1294 let worktree = worktree_entries_for_path_set(
1295 worktree_root,
1296 format,
1297 &candidate_paths,
1298 &index_gitlinks,
1299 Some(&stat_cache),
1300 )?;
1301 let changes = diff_name_status_maps_for_path_set(&head, &worktree, &candidate_paths, options)?;
1302 Ok(mark_unstaged_worktree_oids_unresolved(
1303 changes, &index, &worktree,
1304 ))
1305}
1306
1307pub fn diff_name_status_head_worktree_with_rename_options(
1311 worktree_root: impl AsRef<Path>,
1312 git_dir: impl AsRef<Path>,
1313 format: ObjectFormat,
1314 options: RenameDetectionOptions,
1315) -> Result<Vec<NameStatusEntry>> {
1316 let worktree_root = worktree_root.as_ref();
1317 let git_dir = git_dir.as_ref();
1318 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1319 let head = head_tree_entries(git_dir, format, &db)?;
1320 let IndexSnapshot {
1321 entries: index,
1322 stat_cache,
1323 } = read_index_snapshot(git_dir, format)?;
1324 let index_gitlinks = index_gitlinks(&index);
1325 let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1326 let worktree = worktree_entries_for_path_set(
1327 worktree_root,
1328 format,
1329 &candidate_paths,
1330 &index_gitlinks,
1331 Some(&stat_cache),
1332 )?;
1333 let cache = worktree_blob_cache_for_path_set(
1334 worktree_root,
1335 &head,
1336 &worktree,
1337 &candidate_paths,
1338 options,
1339 )?;
1340 let changes = diff_name_status_maps_with_renames_for_path_set(
1341 &head,
1342 &worktree,
1343 &candidate_paths,
1344 options,
1345 |oid| cache_or_odb_blob(&cache, &db, oid),
1346 )?;
1347 Ok(mark_unstaged_worktree_oids_unresolved(
1348 changes, &index, &worktree,
1349 ))
1350}
1351
1352pub fn diff_name_status_head_index(
1353 git_dir: impl AsRef<Path>,
1354 format: ObjectFormat,
1355) -> Result<Vec<NameStatusEntry>> {
1356 diff_name_status_head_index_with_options(git_dir, format, DiffNameStatusOptions::default())
1357}
1358
1359pub fn diff_name_status_head_index_with_options(
1360 git_dir: impl AsRef<Path>,
1361 format: ObjectFormat,
1362 options: DiffNameStatusOptions,
1363) -> Result<Vec<NameStatusEntry>> {
1364 let git_dir = git_dir.as_ref();
1365 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1366 let head = head_tree_entries(git_dir, format, &db)?;
1367 let index = read_index_entries(git_dir, format)?;
1368 diff_name_status_maps(&head, &index, head.keys().chain(index.keys()), options)
1369}
1370
1371pub fn diff_name_status_head_index_with_rename_options(
1375 git_dir: impl AsRef<Path>,
1376 format: ObjectFormat,
1377 options: RenameDetectionOptions,
1378) -> Result<Vec<NameStatusEntry>> {
1379 let git_dir = git_dir.as_ref();
1380 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1381 let head = head_tree_entries(git_dir, format, &db)?;
1382 let index = read_index_entries(git_dir, format)?;
1383 diff_name_status_maps_with_renames(
1384 &head,
1385 &index,
1386 head.keys().chain(index.keys()),
1387 options,
1388 |oid| read_blob_bytes(&db, oid),
1389 )
1390}
1391
1392fn tree_entries(
1402 tree_oid: &ObjectId,
1403 format: ObjectFormat,
1404 db: &FileObjectDatabase,
1405) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
1406 let mut entries = BTreeMap::new();
1407 if *tree_oid == empty_tree_oid(format)? {
1408 return Ok(entries);
1409 }
1410 collect_tree_entries(db, format, tree_oid, Vec::new(), &mut entries)?;
1411 Ok(entries)
1412}
1413
1414fn empty_tree_oid(format: ObjectFormat) -> Result<ObjectId> {
1417 object_id_for_bytes(format, "tree", b"")
1418}
1419
1420pub fn diff_name_status_tree_index_with_options(
1424 git_dir: impl AsRef<Path>,
1425 format: ObjectFormat,
1426 tree_oid: &ObjectId,
1427 options: DiffNameStatusOptions,
1428) -> Result<Vec<NameStatusEntry>> {
1429 let git_dir = git_dir.as_ref();
1430 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1431 let tree = tree_entries(tree_oid, format, &db)?;
1432 let index = read_index_entries(git_dir, format)?;
1433 diff_name_status_maps(&tree, &index, tree.keys().chain(index.keys()), options)
1434}
1435
1436pub fn diff_name_status_tree_index_with_rename_options(
1441 git_dir: impl AsRef<Path>,
1442 format: ObjectFormat,
1443 tree_oid: &ObjectId,
1444 options: RenameDetectionOptions,
1445) -> Result<Vec<NameStatusEntry>> {
1446 let git_dir = git_dir.as_ref();
1447 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1448 let tree = tree_entries(tree_oid, format, &db)?;
1449 let index = read_index_entries(git_dir, format)?;
1450 diff_name_status_maps_with_renames(
1451 &tree,
1452 &index,
1453 tree.keys().chain(index.keys()),
1454 options,
1455 |oid| read_blob_bytes(&db, oid),
1456 )
1457}
1458
1459pub fn diff_name_status_tree_worktree_with_options(
1465 worktree_root: impl AsRef<Path>,
1466 git_dir: impl AsRef<Path>,
1467 format: ObjectFormat,
1468 tree_oid: &ObjectId,
1469 options: DiffNameStatusOptions,
1470) -> Result<Vec<NameStatusEntry>> {
1471 let worktree_root = worktree_root.as_ref();
1472 let git_dir = git_dir.as_ref();
1473 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1474 let tree = tree_entries(tree_oid, format, &db)?;
1475 let IndexSnapshot {
1476 entries: index,
1477 stat_cache,
1478 } = read_index_snapshot(git_dir, format)?;
1479 let index_gitlinks = index_gitlinks(&index);
1480 let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1481 let worktree = worktree_entries_for_path_set(
1482 worktree_root,
1483 format,
1484 &candidate_paths,
1485 &index_gitlinks,
1486 Some(&stat_cache),
1487 )?;
1488 let changes = diff_name_status_maps_for_path_set(&tree, &worktree, &candidate_paths, options)?;
1489 Ok(mark_unstaged_worktree_oids_unresolved(
1490 changes, &index, &worktree,
1491 ))
1492}
1493
1494pub fn diff_name_status_tree_worktree_with_rename_options(
1500 worktree_root: impl AsRef<Path>,
1501 git_dir: impl AsRef<Path>,
1502 format: ObjectFormat,
1503 tree_oid: &ObjectId,
1504 options: RenameDetectionOptions,
1505) -> Result<Vec<NameStatusEntry>> {
1506 let worktree_root = worktree_root.as_ref();
1507 let git_dir = git_dir.as_ref();
1508 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1509 let tree = tree_entries(tree_oid, format, &db)?;
1510 let IndexSnapshot {
1511 entries: index,
1512 stat_cache,
1513 } = read_index_snapshot(git_dir, format)?;
1514 let index_gitlinks = index_gitlinks(&index);
1515 let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1516 let worktree = worktree_entries_for_path_set(
1517 worktree_root,
1518 format,
1519 &candidate_paths,
1520 &index_gitlinks,
1521 Some(&stat_cache),
1522 )?;
1523 let cache = worktree_blob_cache_for_path_set(
1524 worktree_root,
1525 &tree,
1526 &worktree,
1527 &candidate_paths,
1528 options,
1529 )?;
1530 let changes = diff_name_status_maps_with_renames_for_path_set(
1531 &tree,
1532 &worktree,
1533 &candidate_paths,
1534 options,
1535 |oid| cache_or_odb_blob(&cache, &db, oid),
1536 )?;
1537 Ok(mark_unstaged_worktree_oids_unresolved(
1538 changes, &index, &worktree,
1539 ))
1540}
1541
1542pub fn diff_name_status_index_worktree(
1543 worktree_root: impl AsRef<Path>,
1544 git_dir: impl AsRef<Path>,
1545 format: ObjectFormat,
1546) -> Result<Vec<NameStatusEntry>> {
1547 diff_name_status_index_worktree_with_options(
1548 worktree_root,
1549 git_dir,
1550 format,
1551 DiffNameStatusOptions::default(),
1552 )
1553}
1554
1555pub fn diff_name_status_index_worktree_with_options(
1556 worktree_root: impl AsRef<Path>,
1557 git_dir: impl AsRef<Path>,
1558 format: ObjectFormat,
1559 options: DiffNameStatusOptions,
1560) -> Result<Vec<NameStatusEntry>> {
1561 Ok(diff_name_status_index_worktree_with_options_and_gitlinks(
1562 worktree_root,
1563 git_dir,
1564 format,
1565 options,
1566 )?
1567 .entries)
1568}
1569
1570pub fn diff_name_status_index_worktree_with_options_and_gitlinks(
1571 worktree_root: impl AsRef<Path>,
1572 git_dir: impl AsRef<Path>,
1573 format: ObjectFormat,
1574 options: DiffNameStatusOptions,
1575) -> Result<IndexWorktreeDiff> {
1576 let IndexWorktreeDiff {
1577 entries,
1578 staged_gitlinks,
1579 } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1580 let entries = apply_name_status_options_to_index_worktree_changes(entries, options)?;
1581 Ok(IndexWorktreeDiff {
1582 entries,
1583 staged_gitlinks,
1584 })
1585}
1586
1587pub fn diff_name_status_index_worktree_with_rename_options(
1591 worktree_root: impl AsRef<Path>,
1592 git_dir: impl AsRef<Path>,
1593 format: ObjectFormat,
1594 options: RenameDetectionOptions,
1595) -> Result<Vec<NameStatusEntry>> {
1596 Ok(
1597 diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1598 worktree_root,
1599 git_dir,
1600 format,
1601 options,
1602 )?
1603 .entries,
1604 )
1605}
1606
1607pub fn diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1608 worktree_root: impl AsRef<Path>,
1609 git_dir: impl AsRef<Path>,
1610 format: ObjectFormat,
1611 options: RenameDetectionOptions,
1612) -> Result<IndexWorktreeDiff> {
1613 let IndexWorktreeDiff {
1614 entries,
1615 staged_gitlinks,
1616 } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1617 let entries = apply_name_status_options_to_index_worktree_changes(entries, options.base)?;
1621 Ok(IndexWorktreeDiff {
1622 entries,
1623 staged_gitlinks,
1624 })
1625}
1626
1627fn diff_name_status_index_worktree_changes(
1628 worktree_root: &Path,
1629 git_dir: &Path,
1630 format: ObjectFormat,
1631) -> Result<IndexWorktreeDiff> {
1632 let index_path = sley_index::repository_index_path(git_dir);
1633 let index_metadata = match fs::metadata(&index_path) {
1634 Ok(metadata) => metadata,
1635 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1636 return Ok(IndexWorktreeDiff {
1637 entries: Vec::new(),
1638 staged_gitlinks: Vec::new(),
1639 });
1640 }
1641 Err(err) => return Err(err.into()),
1642 };
1643 let index_bytes = fs::read(&index_path)?;
1644 if let Ok(index) = BorrowedIndex::parse(&index_bytes, format) {
1645 let (has_non_normal_stage, staged_gitlinks) =
1646 index_worktree_metadata_for_entries(&index.entries);
1647 if has_non_normal_stage {
1648 return diff_name_status_index_worktree_changes_from_snapshot(
1649 worktree_root,
1650 git_dir,
1651 format,
1652 );
1653 }
1654 let stat_cache =
1655 IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1656 let entries = diff_name_status_index_worktree_changes_for_borrowed_entries(
1657 worktree_root,
1658 format,
1659 &index.entries,
1660 &stat_cache,
1661 )?;
1662 return Ok(IndexWorktreeDiff {
1663 entries,
1664 staged_gitlinks,
1665 });
1666 }
1667 let index = Index::parse(&index_bytes, format)?;
1668 let (has_non_normal_stage, staged_gitlinks) =
1669 index_worktree_metadata_for_entries(&index.entries);
1670 if has_non_normal_stage {
1671 return diff_name_status_index_worktree_changes_from_snapshot(
1672 worktree_root,
1673 git_dir,
1674 format,
1675 );
1676 }
1677 let stat_cache =
1678 IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1679 let entries = diff_name_status_index_worktree_changes_for_entries(
1680 worktree_root,
1681 format,
1682 &index.entries,
1683 &stat_cache,
1684 )?;
1685 Ok(IndexWorktreeDiff {
1686 entries,
1687 staged_gitlinks,
1688 })
1689}
1690
1691fn diff_name_status_index_worktree_changes_for_borrowed_entries(
1692 worktree_root: &Path,
1693 format: ObjectFormat,
1694 entries: &[sley_index::IndexEntryRef<'_>],
1695 stat_cache: &IndexStatCache,
1696) -> Result<Vec<NameStatusEntry>> {
1697 const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1698 let workers = std::thread::available_parallelism()
1699 .map(|count| count.get())
1700 .unwrap_or(1)
1701 .min(8);
1702 if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1703 return diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1704 worktree_root,
1705 format,
1706 entries,
1707 stat_cache,
1708 );
1709 }
1710 let chunk_size = entries.len().div_ceil(workers);
1711 std::thread::scope(|scope| {
1712 let mut handles = Vec::new();
1713 for chunk in entries.chunks(chunk_size) {
1714 handles.push(scope.spawn(move || {
1715 diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1716 worktree_root,
1717 format,
1718 chunk,
1719 stat_cache,
1720 )
1721 }));
1722 }
1723 let mut changes = Vec::new();
1724 for handle in handles {
1725 let chunk_changes = handle
1726 .join()
1727 .map_err(|_| GitError::Command("diff worker panicked".into()))??;
1728 changes.extend(chunk_changes);
1729 }
1730 Ok(changes)
1731 })
1732}
1733
1734fn diff_name_status_index_worktree_changes_for_entries(
1735 worktree_root: &Path,
1736 format: ObjectFormat,
1737 entries: &[sley_index::IndexEntry],
1738 stat_cache: &IndexStatCache,
1739) -> Result<Vec<NameStatusEntry>> {
1740 const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1741 let workers = std::thread::available_parallelism()
1742 .map(|count| count.get())
1743 .unwrap_or(1)
1744 .min(8);
1745 if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1746 return diff_name_status_index_worktree_changes_for_entry_chunk(
1747 worktree_root,
1748 format,
1749 entries,
1750 stat_cache,
1751 );
1752 }
1753 let chunk_size = entries.len().div_ceil(workers);
1754 std::thread::scope(|scope| {
1755 let mut handles = Vec::new();
1756 for chunk in entries.chunks(chunk_size) {
1757 handles.push(scope.spawn(move || {
1758 diff_name_status_index_worktree_changes_for_entry_chunk(
1759 worktree_root,
1760 format,
1761 chunk,
1762 stat_cache,
1763 )
1764 }));
1765 }
1766 let mut changes = Vec::new();
1767 for handle in handles {
1768 let chunk_changes = handle
1769 .join()
1770 .map_err(|_| GitError::Command("diff worker panicked".into()))??;
1771 changes.extend(chunk_changes);
1772 }
1773 Ok(changes)
1774 })
1775}
1776
1777fn diff_name_status_index_worktree_changes_for_entry_chunk(
1778 worktree_root: &Path,
1779 format: ObjectFormat,
1780 entries: &[sley_index::IndexEntry],
1781 stat_cache: &IndexStatCache,
1782) -> Result<Vec<NameStatusEntry>> {
1783 let mut changes = Vec::new();
1784 let mut path = PathBuf::from(worktree_root);
1785 for entry in entries {
1786 worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path.as_bytes());
1787 if let Some(change) = index_worktree_change_for_entry(&path, format, entry, &stat_cache)? {
1788 changes.push(change);
1789 }
1790 }
1791 Ok(changes)
1792}
1793
1794fn diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1795 worktree_root: &Path,
1796 format: ObjectFormat,
1797 entries: &[sley_index::IndexEntryRef<'_>],
1798 stat_cache: &IndexStatCache,
1799) -> Result<Vec<NameStatusEntry>> {
1800 let mut changes = Vec::new();
1801 let mut path = PathBuf::from(worktree_root);
1802 for entry in entries {
1803 worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path);
1804 if let Some(change) = index_worktree_change_for_entry(&path, format, entry, &stat_cache)? {
1805 changes.push(change);
1806 }
1807 }
1808 Ok(changes)
1809}
1810
1811fn index_worktree_metadata_for_entries(
1812 entries: &[impl WorktreeIndexEntry],
1813) -> (bool, Vec<IndexGitlinkEntry>) {
1814 let mut has_non_normal_stage = false;
1815 let mut staged_gitlinks = Vec::new();
1816 for entry in entries {
1817 if entry.stage() != sley_index::Stage::Normal {
1818 has_non_normal_stage = true;
1819 }
1820 if entry.mode() == 0o160000 {
1821 staged_gitlinks.push(IndexGitlinkEntry {
1822 path: BString::from_bytes(entry.git_path()),
1823 oid: entry.oid(),
1824 });
1825 }
1826 }
1827 (has_non_normal_stage, staged_gitlinks)
1828}
1829
1830fn diff_name_status_index_worktree_changes_from_snapshot(
1831 worktree_root: &Path,
1832 git_dir: &Path,
1833 format: ObjectFormat,
1834) -> Result<IndexWorktreeDiff> {
1835 let IndexSnapshot {
1836 entries: index,
1837 stat_cache,
1838 } = read_index_snapshot(git_dir, format)?;
1839 let index_gitlinks = index_gitlinks(&index);
1840 let staged_gitlinks = index_gitlinks
1841 .iter()
1842 .map(|(path, oid)| IndexGitlinkEntry {
1843 path: BString::from_bytes(path),
1844 oid: *oid,
1845 })
1846 .collect();
1847 let mut changes = Vec::new();
1848 for (git_path, left) in &index {
1849 let right = worktree_entry_for_path(
1850 worktree_root,
1851 format,
1852 git_path,
1853 &index_gitlinks,
1854 Some(&stat_cache),
1855 )?;
1856 let Some(right) = right else {
1857 changes.push(NameStatusEntry {
1858 status: NameStatus::Deleted,
1859 path: git_path.clone().into(),
1860 old_path: None,
1861 old_mode: Some(left.mode),
1862 new_mode: None,
1863 old_oid: Some(left.oid),
1864 new_oid: None,
1865 });
1866 continue;
1867 };
1868 if right != *left {
1869 changes.push(NameStatusEntry {
1870 status: NameStatus::Modified,
1871 path: git_path.clone().into(),
1872 old_path: None,
1873 old_mode: Some(left.mode),
1874 new_mode: Some(right.mode),
1875 old_oid: Some(left.oid),
1876 new_oid: Some(right.oid),
1877 });
1878 }
1879 }
1880 Ok(IndexWorktreeDiff {
1881 entries: changes,
1882 staged_gitlinks,
1883 })
1884}
1885
1886fn apply_name_status_options_to_index_worktree_changes(
1887 mut changes: Vec<NameStatusEntry>,
1888 options: DiffNameStatusOptions,
1889) -> Result<Vec<NameStatusEntry>> {
1890 if options.detect_renames || options.detect_copies {
1891 changes.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
1892 }
1893 Ok(changes)
1894}
1895
1896pub fn diff_name_status_index_worktree_for_diff_files_with_options(
1920 worktree_root: impl AsRef<Path>,
1921 git_dir: impl AsRef<Path>,
1922 format: ObjectFormat,
1923 options: DiffNameStatusOptions,
1924) -> Result<Vec<NameStatusEntry>> {
1925 let worktree_root = worktree_root.as_ref();
1926 let git_dir = git_dir.as_ref();
1927 let changes =
1928 diff_name_status_index_worktree_with_options(worktree_root, git_dir, format, options)?;
1929 augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
1930}
1931
1932pub fn diff_name_status_index_worktree_for_diff_files_with_rename_options(
1936 worktree_root: impl AsRef<Path>,
1937 git_dir: impl AsRef<Path>,
1938 format: ObjectFormat,
1939 options: RenameDetectionOptions,
1940) -> Result<Vec<NameStatusEntry>> {
1941 let worktree_root = worktree_root.as_ref();
1942 let git_dir = git_dir.as_ref();
1943 let changes = diff_name_status_index_worktree_with_rename_options(
1944 worktree_root,
1945 git_dir,
1946 format,
1947 options,
1948 )?;
1949 augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
1950}
1951
1952fn augment_with_stat_dirty_entries(
1959 worktree_root: &Path,
1960 git_dir: &Path,
1961 format: ObjectFormat,
1962 mut content_changes: Vec<NameStatusEntry>,
1963) -> Result<Vec<NameStatusEntry>> {
1964 let IndexSnapshot {
1965 entries: index,
1966 stat_cache,
1967 } = read_index_snapshot(git_dir, format)?;
1968 let already_reported: BTreeSet<&[u8]> = content_changes
1971 .iter()
1972 .map(|entry| entry.path.as_bytes())
1973 .collect();
1974 let mut extras = Vec::new();
1975 for (git_path, tracked) in &index {
1976 if already_reported.contains(git_path.as_slice()) {
1977 continue;
1978 }
1979 let Some(cached) = stat_cache.entry_for_git_path(git_path) else {
1980 continue;
1981 };
1982 if tracked.mode == 0o160000 {
1985 continue;
1986 }
1987 let path = worktree_path_for_repo_path(worktree_root, git_path);
1988 let Ok(metadata) = fs::symlink_metadata(&path) else {
1989 continue;
1992 };
1993 if !(metadata.is_file() || metadata.file_type().is_symlink()) {
1994 continue;
1995 }
1996 match stat_cache.index_entry_worktree_stat_verdict(cached, &metadata) {
1997 sley_index::StatVerdict::Clean => continue,
1998 sley_index::StatVerdict::Dirty => {}
1999 sley_index::StatVerdict::RacyNeedsContentCheck => {
2004 if worktree_oid_matches_index(worktree_root, git_path, &metadata, tracked, format)? {
2005 continue;
2006 }
2007 }
2008 }
2009 extras.push(NameStatusEntry {
2010 status: NameStatus::Modified,
2011 path: git_path.clone().into(),
2012 old_path: None,
2013 old_mode: Some(tracked.mode),
2014 new_mode: Some(tracked.mode),
2015 old_oid: Some(tracked.oid),
2016 new_oid: None,
2017 });
2018 }
2019 if !extras.is_empty() {
2020 content_changes.extend(extras);
2021 content_changes
2022 .sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2023 }
2024 Ok(content_changes)
2025}
2026
2027fn worktree_oid_matches_index(
2032 worktree_root: &Path,
2033 git_path: &[u8],
2034 metadata: &fs::Metadata,
2035 index_entry: &TrackedEntry,
2036 format: ObjectFormat,
2037) -> Result<bool> {
2038 let file_type = metadata.file_type();
2039 let path = worktree_path_for_repo_path(worktree_root, git_path);
2040 let body = if file_type.is_symlink() {
2041 symlink_target_bytes(&path)?
2042 } else {
2043 fs::read(&path)?
2044 };
2045 let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
2046 let mode = if file_type.is_symlink() {
2047 0o120000
2048 } else {
2049 file_mode(metadata)
2050 };
2051 Ok(oid == index_entry.oid && mode == index_entry.mode)
2052}
2053
2054pub fn diff_name_status_trees_with_options(
2055 db: &FileObjectDatabase,
2056 format: ObjectFormat,
2057 left_tree: &ObjectId,
2058 right_tree: &ObjectId,
2059 options: DiffNameStatusOptions,
2060) -> Result<Vec<NameStatusEntry>> {
2061 let needs_full_maps = options.detect_copies && options.find_copies_harder;
2066 let (left_entries, right_entries) = if needs_full_maps {
2067 collect_full_tree_pair(db, format, left_tree, right_tree)?
2068 } else {
2069 changed_tree_entries(db, format, left_tree, right_tree)?
2070 };
2071 diff_name_status_maps(
2072 &left_entries,
2073 &right_entries,
2074 left_entries.keys().chain(right_entries.keys()),
2075 options,
2076 )
2077}
2078
2079pub fn diff_name_status_empty_tree_with_options(
2080 db: &FileObjectDatabase,
2081 format: ObjectFormat,
2082 right_tree: &ObjectId,
2083 options: DiffNameStatusOptions,
2084) -> Result<Vec<NameStatusEntry>> {
2085 let left_entries = BTreeMap::new();
2086 let mut right_entries = BTreeMap::new();
2087 collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2088 diff_name_status_maps(&left_entries, &right_entries, right_entries.keys(), options)
2089}
2090
2091pub fn diff_name_status_trees_with_rename_options(
2099 db: &FileObjectDatabase,
2100 format: ObjectFormat,
2101 left_tree: &ObjectId,
2102 right_tree: &ObjectId,
2103 options: RenameDetectionOptions,
2104) -> Result<Vec<NameStatusEntry>> {
2105 let needs_full_maps = options.base.detect_copies && options.base.find_copies_harder;
2109 let (left_entries, right_entries) = if needs_full_maps {
2110 collect_full_tree_pair(db, format, left_tree, right_tree)?
2111 } else {
2112 changed_tree_entries(db, format, left_tree, right_tree)?
2113 };
2114 diff_name_status_maps_with_renames(
2115 &left_entries,
2116 &right_entries,
2117 left_entries.keys().chain(right_entries.keys()),
2118 options,
2119 |oid| read_blob_bytes(db, oid),
2120 )
2121}
2122
2123pub fn diff_name_status_empty_tree_with_rename_options(
2130 db: &FileObjectDatabase,
2131 format: ObjectFormat,
2132 right_tree: &ObjectId,
2133 options: RenameDetectionOptions,
2134) -> Result<Vec<NameStatusEntry>> {
2135 let left_entries = BTreeMap::new();
2136 let mut right_entries = BTreeMap::new();
2137 collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2138 diff_name_status_maps_with_renames(
2139 &left_entries,
2140 &right_entries,
2141 right_entries.keys(),
2142 options,
2143 |oid| read_blob_bytes(db, oid),
2144 )
2145}
2146
2147fn read_blob_bytes(db: &FileObjectDatabase, oid: &ObjectId) -> Option<Vec<u8>> {
2152 match db.read_object(oid) {
2153 Ok(object) if object.object_type == ObjectType::Blob => Some(object.body.clone()),
2154 _ => None,
2155 }
2156}
2157
2158fn raw_name_status_changes_for_unique_paths<'a>(
2161 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2162 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2163 paths: impl Iterator<Item = &'a Vec<u8>>,
2164) -> Vec<NameStatusEntry> {
2165 let mut changes = Vec::new();
2166 for path in paths {
2167 let left = left_entries.get(path);
2168 let right = right_entries.get(path);
2169 let status = match (left, right) {
2170 (None, Some(_)) => Some(NameStatus::Added),
2171 (Some(_), None) => Some(NameStatus::Deleted),
2172 (Some(left), Some(right)) if left != right => Some(NameStatus::Modified),
2173 _ => None,
2174 };
2175 if let Some(status) = status {
2176 changes.push(NameStatusEntry {
2177 status,
2178 path: path.clone().into(),
2179 old_path: None,
2180 old_mode: left.map(|entry| entry.mode),
2181 new_mode: right.map(|entry| entry.mode),
2182 old_oid: left.map(|entry| entry.oid),
2183 new_oid: right.map(|entry| entry.oid),
2184 });
2185 }
2186 }
2187 changes
2188}
2189
2190fn diff_name_status_maps<'a>(
2191 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2192 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2193 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2194 options: DiffNameStatusOptions,
2195) -> Result<Vec<NameStatusEntry>> {
2196 let paths = candidate_path_set(candidate_paths);
2197 diff_name_status_maps_for_path_set(left_entries, right_entries, &paths, options)
2198}
2199
2200fn diff_name_status_maps_for_path_set(
2201 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2202 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2203 candidate_paths: &BTreeSet<Vec<u8>>,
2204 options: DiffNameStatusOptions,
2205) -> Result<Vec<NameStatusEntry>> {
2206 diff_name_status_maps_for_unique_paths(
2207 left_entries,
2208 right_entries,
2209 candidate_paths.iter(),
2210 options,
2211 )
2212}
2213
2214fn diff_name_status_maps_for_unique_paths<'a>(
2215 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2216 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2217 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2218 options: DiffNameStatusOptions,
2219) -> Result<Vec<NameStatusEntry>> {
2220 let mut changes =
2221 raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2222 if options.detect_renames {
2223 changes = detect_exact_renames(changes, left_entries, right_entries, options.rename_empty);
2224 }
2225 if options.detect_copies {
2226 changes = detect_exact_copies(
2227 changes,
2228 left_entries,
2229 right_entries,
2230 options.find_copies_harder,
2231 options.rename_empty,
2232 );
2233 }
2234 Ok(changes)
2235}
2236
2237fn diff_name_status_maps_with_renames<'a>(
2246 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2247 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2248 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2249 options: RenameDetectionOptions,
2250 fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2251) -> Result<Vec<NameStatusEntry>> {
2252 let paths = candidate_path_set(candidate_paths);
2253 diff_name_status_maps_with_renames_for_path_set(
2254 left_entries,
2255 right_entries,
2256 &paths,
2257 options,
2258 fetch_blob,
2259 )
2260}
2261
2262fn diff_name_status_maps_with_renames_for_path_set(
2263 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2264 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2265 candidate_paths: &BTreeSet<Vec<u8>>,
2266 options: RenameDetectionOptions,
2267 fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2268) -> Result<Vec<NameStatusEntry>> {
2269 diff_name_status_maps_with_renames_for_unique_paths(
2270 left_entries,
2271 right_entries,
2272 candidate_paths.iter(),
2273 options,
2274 fetch_blob,
2275 )
2276}
2277
2278fn diff_name_status_maps_with_renames_for_unique_paths<'a>(
2279 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2280 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2281 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2282 options: RenameDetectionOptions,
2283 fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2284) -> Result<Vec<NameStatusEntry>> {
2285 let base = options.base;
2286 let mut changes =
2287 raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2288 if base.detect_renames {
2289 changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
2290 }
2291 if base.detect_renames && options.detect_inexact {
2295 changes = detect_inexact_renames(changes, &options, &fetch_blob);
2296 }
2297 if base.detect_copies {
2298 changes = detect_exact_copies(
2299 changes,
2300 left_entries,
2301 right_entries,
2302 base.find_copies_harder,
2303 base.rename_empty,
2304 );
2305 }
2306 if base.detect_copies && options.detect_inexact {
2307 changes = detect_inexact_copies(changes, left_entries, &options, &fetch_blob);
2308 }
2309 Ok(changes)
2310}
2311
2312fn detect_exact_renames(
2313 changes: Vec<NameStatusEntry>,
2314 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2315 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2316 rename_empty: bool,
2317) -> Vec<NameStatusEntry> {
2318 let added = changes
2319 .iter()
2320 .enumerate()
2321 .filter(|(_, entry)| entry.status == NameStatus::Added)
2322 .map(|(idx, entry)| (idx, entry.path.clone()))
2323 .collect::<Vec<_>>();
2324 let deleted = changes
2325 .iter()
2326 .filter(|entry| entry.status == NameStatus::Deleted)
2327 .map(|entry| entry.path.clone())
2328 .collect::<Vec<_>>();
2329 let mut consumed = BTreeSet::new();
2330 let mut renamed_old_paths = BTreeSet::new();
2331 let mut result = Vec::new();
2332
2333 for old_path in deleted {
2334 let Some(left) = left_entries.get(old_path.as_bytes()) else {
2335 continue;
2336 };
2337 if let Some((idx, new_path)) = added.iter().find(|(idx, new_path)| {
2338 !consumed.contains(idx)
2339 && right_entries.get(new_path.as_bytes()).is_some_and(|right| {
2340 right.oid == left.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2341 })
2342 }) {
2343 consumed.insert(*idx);
2344 renamed_old_paths.insert(old_path.clone());
2345 let right = right_entries.get(new_path.as_bytes());
2346 result.push(NameStatusEntry {
2347 status: NameStatus::Renamed(100),
2348 path: new_path.clone(),
2349 old_path: Some(old_path),
2350 old_mode: Some(left.mode),
2351 new_mode: right.map(|entry| entry.mode),
2352 old_oid: Some(left.oid),
2353 new_oid: right.map(|entry| entry.oid),
2354 });
2355 }
2356 }
2357
2358 for (idx, entry) in changes.into_iter().enumerate() {
2359 if entry.status == NameStatus::Added && consumed.contains(&idx) {
2360 continue;
2361 }
2362 if entry.status == NameStatus::Deleted && renamed_old_paths.contains(&entry.path) {
2363 continue;
2364 }
2365 result.push(entry);
2366 }
2367 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2368 result
2369}
2370
2371fn detect_exact_copies(
2372 changes: Vec<NameStatusEntry>,
2373 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2374 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2375 find_copies_harder: bool,
2376 rename_empty: bool,
2377) -> Vec<NameStatusEntry> {
2378 let changed_sources = changes
2379 .iter()
2380 .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
2381 .map(|entry| entry.path.clone())
2382 .collect::<BTreeSet<_>>();
2383 let source_paths = left_entries
2384 .keys()
2385 .filter(|path| find_copies_harder || changed_sources.contains(path.as_slice()))
2386 .cloned()
2387 .collect::<Vec<_>>();
2388
2389 let mut result = Vec::new();
2390 for entry in changes {
2391 if entry.status != NameStatus::Added {
2392 result.push(entry);
2393 continue;
2394 }
2395 let Some(right) = right_entries.get(entry.path.as_bytes()) else {
2396 result.push(entry);
2397 continue;
2398 };
2399 if let Some(old_path) = source_paths.iter().find(|old_path| {
2400 old_path.as_slice() != entry.path.as_bytes()
2401 && left_entries.get(*old_path).is_some_and(|left| {
2402 left.oid == right.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2403 })
2404 }) {
2405 result.push(NameStatusEntry {
2406 status: NameStatus::Copied(100),
2407 path: entry.path,
2408 old_path: Some(old_path.clone().into()),
2409 old_mode: left_entries
2410 .get(old_path.as_slice())
2411 .map(|entry| entry.mode),
2412 new_mode: entry.new_mode,
2413 old_oid: left_entries.get(old_path.as_slice()).map(|entry| entry.oid),
2414 new_oid: entry.new_oid,
2415 });
2416 } else {
2417 result.push(entry);
2418 }
2419 }
2420 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2421 result
2422}
2423
2424#[derive(Debug, Clone)]
2427struct RenameSourceMeta {
2428 path: BString,
2429 mode: Option<u32>,
2430 oid: Option<ObjectId>,
2431}
2432
2433struct ScoredPair {
2436 src: usize,
2438 dst: usize,
2440 score: u8,
2442}
2443
2444fn detect_inexact_renames(
2455 changes: Vec<NameStatusEntry>,
2456 options: &RenameDetectionOptions,
2457 fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
2458) -> Vec<NameStatusEntry> {
2459 let threshold = options.rename_threshold;
2460 if threshold > 100 {
2462 return changes;
2463 }
2464
2465 let mut deleted: Vec<(usize, Vec<u8>)> = Vec::new();
2468 let mut added: Vec<(usize, Vec<u8>)> = Vec::new();
2469 for (idx, entry) in changes.iter().enumerate() {
2470 match entry.status {
2471 NameStatus::Deleted => {
2472 let Some(oid) = entry.old_oid.as_ref() else {
2473 continue;
2474 };
2475 if !options.base.rename_empty && is_empty_blob_oid(oid) {
2476 continue;
2477 }
2478 if let Some(bytes) = fetch_blob(oid) {
2479 deleted.push((idx, bytes));
2480 }
2481 }
2482 NameStatus::Added => {
2483 let Some(oid) = entry.new_oid.as_ref() else {
2484 continue;
2485 };
2486 if !options.base.rename_empty && is_empty_blob_oid(oid) {
2487 continue;
2488 }
2489 if let Some(bytes) = fetch_blob(oid) {
2490 added.push((idx, bytes));
2491 }
2492 }
2493 _ => {}
2494 }
2495 }
2496
2497 if deleted.is_empty() || added.is_empty() {
2498 return changes;
2499 }
2500
2501 let mut pairs: Vec<ScoredPair> = Vec::new();
2503 for (si, (_, src_bytes)) in deleted.iter().enumerate() {
2504 for (di, (_, dst_bytes)) in added.iter().enumerate() {
2505 let score = blob_similarity(src_bytes, dst_bytes);
2506 if score >= threshold {
2507 pairs.push(ScoredPair {
2508 src: si,
2509 dst: di,
2510 score,
2511 });
2512 }
2513 }
2514 }
2515 pairs.sort_by(|a, b| {
2518 b.score
2519 .cmp(&a.score)
2520 .then_with(|| a.src.cmp(&b.src))
2521 .then_with(|| a.dst.cmp(&b.dst))
2522 });
2523
2524 let mut src_used = vec![false; deleted.len()];
2526 let mut dst_used = vec![false; added.len()];
2527 let mut rename_of: BTreeMap<usize, (usize, u8)> = BTreeMap::new();
2529 for pair in pairs {
2530 if src_used[pair.src] || dst_used[pair.dst] {
2531 continue;
2532 }
2533 src_used[pair.src] = true;
2534 dst_used[pair.dst] = true;
2535 let src_change_idx = deleted[pair.src].0;
2536 let dst_change_idx = added[pair.dst].0;
2537 rename_of.insert(dst_change_idx, (src_change_idx, pair.score));
2538 }
2539
2540 if rename_of.is_empty() {
2541 return changes;
2542 }
2543
2544 let consumed_sources: BTreeSet<usize> =
2547 rename_of.values().map(|(src_idx, _)| *src_idx).collect();
2548 let source_meta: BTreeMap<usize, RenameSourceMeta> = consumed_sources
2549 .iter()
2550 .map(|&src_idx| {
2551 let src = &changes[src_idx];
2552 (
2553 src_idx,
2554 RenameSourceMeta {
2555 path: src.path.clone(),
2556 mode: src.old_mode,
2557 oid: src.old_oid,
2558 },
2559 )
2560 })
2561 .collect();
2562
2563 let mut result = Vec::with_capacity(changes.len());
2564 for (idx, entry) in changes.into_iter().enumerate() {
2565 if consumed_sources.contains(&idx) {
2566 continue;
2568 }
2569 if let Some((src_idx, score)) = rename_of.get(&idx) {
2570 let meta = source_meta
2574 .get(src_idx)
2575 .cloned()
2576 .unwrap_or(RenameSourceMeta {
2577 path: BString::default(),
2578 mode: None,
2579 oid: None,
2580 });
2581 result.push(NameStatusEntry {
2582 status: NameStatus::Renamed(*score),
2583 path: entry.path,
2584 old_path: Some(meta.path),
2585 old_mode: meta.mode,
2586 new_mode: entry.new_mode,
2587 old_oid: meta.oid,
2588 new_oid: entry.new_oid,
2589 });
2590 continue;
2591 }
2592 result.push(entry);
2593 }
2594
2595 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2596 result
2597}
2598
2599fn detect_inexact_copies(
2609 changes: Vec<NameStatusEntry>,
2610 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2611 options: &RenameDetectionOptions,
2612 fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
2613) -> Vec<NameStatusEntry> {
2614 let threshold = options.copy_threshold;
2615 if threshold > 100 {
2616 return changes;
2617 }
2618
2619 let changed_sources = changes
2620 .iter()
2621 .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
2622 .map(|entry| entry.path.clone())
2623 .collect::<BTreeSet<_>>();
2624 let mut sources: Vec<(Vec<u8>, &TrackedEntry, Vec<u8>)> = Vec::new();
2626 for (path, tracked) in left_entries {
2627 if !(options.base.find_copies_harder || changed_sources.contains(path.as_slice())) {
2628 continue;
2629 }
2630 if !options.base.rename_empty && is_empty_blob_oid(&tracked.oid) {
2631 continue;
2632 }
2633 if let Some(bytes) = fetch_blob(&tracked.oid) {
2634 sources.push((path.clone(), tracked, bytes));
2635 }
2636 }
2637 if sources.is_empty() {
2638 return changes;
2639 }
2640
2641 let mut result = Vec::with_capacity(changes.len());
2642 for entry in changes {
2643 if entry.status != NameStatus::Added {
2644 result.push(entry);
2645 continue;
2646 }
2647 let Some(new_oid) = entry.new_oid.as_ref() else {
2648 result.push(entry);
2649 continue;
2650 };
2651 let Some(dst_bytes) = fetch_blob(new_oid) else {
2652 result.push(entry);
2653 continue;
2654 };
2655
2656 let mut best: Option<(usize, u8)> = None;
2660 for (i, (src_path, _, src_bytes)) in sources.iter().enumerate() {
2661 if src_path.as_slice() == entry.path.as_bytes() {
2662 continue;
2663 }
2664 let score = blob_similarity(src_bytes, &dst_bytes);
2665 if score < threshold {
2666 continue;
2667 }
2668 match best {
2669 Some((_, best_score)) if best_score >= score => {}
2670 _ => best = Some((i, score)),
2671 }
2672 }
2673
2674 if let Some((src_idx, score)) = best {
2675 let (src_path, src_tracked, _) = &sources[src_idx];
2676 result.push(NameStatusEntry {
2677 status: NameStatus::Copied(score),
2678 path: entry.path,
2679 old_path: Some(src_path.clone().into()),
2680 old_mode: Some(src_tracked.mode),
2681 new_mode: entry.new_mode,
2682 old_oid: Some(src_tracked.oid),
2683 new_oid: entry.new_oid,
2684 });
2685 } else {
2686 result.push(entry);
2687 }
2688 }
2689 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2690 result
2691}
2692
2693fn is_empty_blob_oid(oid: &ObjectId) -> bool {
2694 object_id_for_bytes(oid.format(), "blob", b"").is_ok_and(|empty| empty == *oid)
2695}
2696
2697const MAX_SPAN_BYTES: usize = 64;
2738
2739pub fn blob_similarity(a: &[u8], b: &[u8]) -> u8 {
2751 if a == b {
2753 return 100;
2754 }
2755 let max_size = a.len().max(b.len());
2756 if max_size == 0 {
2757 return 100;
2760 }
2761
2762 let src = span_hash_counts(a);
2763 let dst = span_hash_counts(b);
2764 let common = common_span_bytes(&src, &dst);
2765
2766 const MAX_SCORE: u64 = 60000;
2774 let internal = (common as u64 * MAX_SCORE) / max_size as u64;
2775 let score = internal * 100 / MAX_SCORE;
2776 score.min(100) as u8
2777}
2778
2779fn span_hash_counts(data: &[u8]) -> BTreeMap<u64, usize> {
2786 let mut counts: BTreeMap<u64, usize> = BTreeMap::new();
2787 let mut idx = 0usize;
2788 let len = data.len();
2789 while idx < len {
2790 let mut accum1: u32 = 0;
2795 let mut accum2: u32 = 0;
2796 let mut span_len = 0usize;
2797 loop {
2798 let c = data[idx] as u32;
2799 idx += 1;
2800 span_len += 1;
2801 accum1 = (accum1 << 7) ^ (accum2 >> 25);
2802 accum2 = (accum2 << 7) ^ (accum1 >> 25);
2803 accum1 = accum1.wrapping_add(c);
2804 let newline = c == u32::from(b'\n');
2805 if span_len >= MAX_SPAN_BYTES || newline || idx >= len {
2806 break;
2807 }
2808 }
2809 let hash = ((accum1 as u64) << 32) ^ (accum2 as u64) ^ ((span_len as u64) << 1);
2813 *counts.entry(hash).or_insert(0) += span_len;
2814 }
2815 counts
2816}
2817
2818pub fn count_changes(src: &[u8], dst: &[u8]) -> (usize, usize) {
2827 let src_counts = span_hash_counts(src);
2828 let dst_counts = span_hash_counts(dst);
2829 let copied = common_span_bytes(&src_counts, &dst_counts);
2830 (copied, dst.len() - copied)
2831}
2832
2833fn common_span_bytes(src: &BTreeMap<u64, usize>, dst: &BTreeMap<u64, usize>) -> usize {
2834 let mut common = 0usize;
2835 let (small, large) = if src.len() <= dst.len() {
2837 (src, dst)
2838 } else {
2839 (dst, src)
2840 };
2841 for (hash, small_bytes) in small {
2842 if let Some(large_bytes) = large.get(hash) {
2843 common += (*small_bytes).min(*large_bytes);
2844 }
2845 }
2846 common
2847}
2848
2849fn diff_entry_sort_path(entry: &NameStatusEntry) -> &[u8] {
2850 entry.path.as_bytes()
2853}
2854
2855fn mark_unstaged_worktree_oids_unresolved(
2856 changes: Vec<NameStatusEntry>,
2857 index_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2858 worktree_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2859) -> Vec<NameStatusEntry> {
2860 changes
2861 .into_iter()
2862 .map(|mut entry| {
2863 let worktree_entry = worktree_entries.get(entry.path.as_bytes());
2864 if worktree_entry != index_entries.get(entry.path.as_bytes()) {
2865 entry.new_oid = None;
2866 }
2867 entry
2868 })
2869 .collect()
2870}
2871
2872#[derive(Debug, Clone, PartialEq, Eq)]
2873struct TrackedEntry {
2874 mode: u32,
2875 oid: ObjectId,
2876}
2877
2878type TrackedEntryMap = BTreeMap<Vec<u8>, TrackedEntry>;
2881
2882type TrackedEntryPair = (TrackedEntryMap, TrackedEntryMap);
2884
2885struct IndexSnapshot {
2886 entries: BTreeMap<Vec<u8>, TrackedEntry>,
2887 stat_cache: IndexStatCache,
2888}
2889
2890fn read_index_entries(
2891 git_dir: &Path,
2892 format: ObjectFormat,
2893) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
2894 Ok(read_index_snapshot(git_dir, format)?.entries)
2895}
2896
2897fn read_index_snapshot(git_dir: &Path, format: ObjectFormat) -> Result<IndexSnapshot> {
2898 let index_path = sley_index::repository_index_path(git_dir);
2899 let index_metadata = match fs::metadata(&index_path) {
2900 Ok(metadata) => metadata,
2901 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
2902 return Ok(IndexSnapshot {
2903 entries: BTreeMap::new(),
2904 stat_cache: IndexStatCache::default(),
2905 });
2906 }
2907 Err(err) => return Err(err.into()),
2908 };
2909 let index = Index::parse(&fs::read(&index_path)?, format)?;
2910 let stat_cache =
2911 IndexStatCache::from_index_mtime(&index, sley_index::file_mtime_parts(&index_metadata));
2912 let entries = index
2913 .entries
2914 .into_iter()
2915 .map(|entry| {
2916 (
2917 entry.path.into_bytes(),
2918 TrackedEntry {
2919 mode: entry.mode,
2920 oid: entry.oid,
2921 },
2922 )
2923 })
2924 .collect();
2925 Ok(IndexSnapshot {
2926 entries,
2927 stat_cache,
2928 })
2929}
2930
2931trait WorktreeIndexEntry {
2932 fn git_path(&self) -> &[u8];
2933 fn stage(&self) -> sley_index::Stage;
2934 fn mode(&self) -> u32;
2935 fn oid(&self) -> ObjectId;
2936 fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool;
2937}
2938
2939impl WorktreeIndexEntry for sley_index::IndexEntry {
2940 fn git_path(&self) -> &[u8] {
2941 self.path.as_bytes()
2942 }
2943
2944 fn stage(&self) -> sley_index::Stage {
2945 sley_index::IndexEntry::stage(self)
2946 }
2947
2948 fn mode(&self) -> u32 {
2949 self.mode
2950 }
2951
2952 fn oid(&self) -> ObjectId {
2953 self.oid
2954 }
2955
2956 fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
2957 stat_cache.reusable_index_entry(self, metadata).is_some()
2958 }
2959}
2960
2961impl WorktreeIndexEntry for sley_index::IndexEntryRef<'_> {
2962 fn git_path(&self) -> &[u8] {
2963 self.path
2964 }
2965
2966 fn stage(&self) -> sley_index::Stage {
2967 sley_index::IndexEntryRef::stage(self)
2968 }
2969
2970 fn mode(&self) -> u32 {
2971 self.mode
2972 }
2973
2974 fn oid(&self) -> ObjectId {
2975 self.oid
2976 }
2977
2978 fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
2979 stat_cache.reusable_index_entry_ref(self, metadata)
2980 }
2981}
2982
2983fn tracked_entry_from_index(entry: &impl WorktreeIndexEntry) -> TrackedEntry {
2984 TrackedEntry {
2985 mode: entry.mode(),
2986 oid: entry.oid(),
2987 }
2988}
2989
2990fn head_tree_entries(
2991 git_dir: &Path,
2992 format: ObjectFormat,
2993 db: &FileObjectDatabase,
2994) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
2995 let refs = FileRefStore::new(git_dir, format);
2996 let Some(head) = refs.read_ref("HEAD")? else {
2997 return Ok(BTreeMap::new());
2998 };
2999 let commit_oid = match head {
3000 RefTarget::Direct(oid) => Some(oid),
3001 RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
3002 Some(RefTarget::Direct(oid)) => Some(oid),
3003 _ => None,
3004 },
3005 };
3006 let Some(commit_oid) = commit_oid else {
3007 return Ok(BTreeMap::new());
3008 };
3009 let object = db.read_object(&commit_oid)?;
3010 if object.object_type != ObjectType::Commit {
3011 return Err(GitError::InvalidObject(format!(
3012 "HEAD {commit_oid} is not a commit"
3013 )));
3014 }
3015 let commit = Commit::parse_ref(format, &object.body)?;
3016 let mut entries = BTreeMap::new();
3017 collect_tree_entries(db, format, &commit.tree, Vec::new(), &mut entries)?;
3018 Ok(entries)
3019}
3020
3021fn collect_tree_entries(
3030 db: &FileObjectDatabase,
3031 format: ObjectFormat,
3032 tree_oid: &ObjectId,
3033 prefix: Vec<u8>,
3034 entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3035) -> Result<()> {
3036 for (rel_path, (mode, oid)) in flatten_tree(db, format, tree_oid)? {
3037 let path = join_tree_path(&prefix, &rel_path);
3038 entries.insert(path, TrackedEntry { mode, oid });
3039 }
3040 Ok(())
3041}
3042
3043const TREE_ENTRY_MODE: u32 = 0o040000;
3045
3046fn read_tree_object(
3050 db: &FileObjectDatabase,
3051 format: ObjectFormat,
3052 tree_oid: &ObjectId,
3053) -> Result<Tree> {
3054 let object = db.read_object(tree_oid)?;
3055 if object.object_type != ObjectType::Tree {
3056 return Err(GitError::InvalidObject(format!(
3057 "expected tree {tree_oid}, found {}",
3058 object.object_type.as_str()
3059 )));
3060 }
3061 Tree::parse(format, &object.body)
3062}
3063
3064fn join_tree_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
3067 let mut path = Vec::with_capacity(prefix.len() + 1 + name.len());
3068 path.extend_from_slice(prefix);
3069 if !path.is_empty() {
3070 path.push(b'/');
3071 }
3072 path.extend_from_slice(name);
3073 path
3074}
3075
3076fn collect_full_tree_pair(
3080 db: &FileObjectDatabase,
3081 format: ObjectFormat,
3082 left_tree: &ObjectId,
3083 right_tree: &ObjectId,
3084) -> Result<TrackedEntryPair> {
3085 let mut left = BTreeMap::new();
3086 collect_tree_entries(db, format, left_tree, Vec::new(), &mut left)?;
3087 let mut right = BTreeMap::new();
3088 collect_tree_entries(db, format, right_tree, Vec::new(), &mut right)?;
3089 Ok((left, right))
3090}
3091
3092fn changed_tree_entries(
3108 db: &FileObjectDatabase,
3109 format: ObjectFormat,
3110 left_tree: &ObjectId,
3111 right_tree: &ObjectId,
3112) -> Result<TrackedEntryPair> {
3113 let mut left = BTreeMap::new();
3114 let mut right = BTreeMap::new();
3115 if left_tree != right_tree {
3117 diff_tree_pair(
3118 db,
3119 format,
3120 left_tree,
3121 right_tree,
3122 &[],
3123 &mut left,
3124 &mut right,
3125 )?;
3126 }
3127 Ok((left, right))
3128}
3129
3130fn diff_tree_pair(
3134 db: &FileObjectDatabase,
3135 format: ObjectFormat,
3136 left_tree: &ObjectId,
3137 right_tree: &ObjectId,
3138 prefix: &[u8],
3139 left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3140 right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3141) -> Result<()> {
3142 let left_entries = read_tree_object(db, format, left_tree)?.entries;
3143 let right_entries = read_tree_object(db, format, right_tree)?.entries;
3144
3145 let mut right_by_name: HashMap<&[u8], &TreeEntry> = HashMap::with_capacity(right_entries.len());
3150 for entry in &right_entries {
3151 right_by_name.insert(entry.name.as_bytes(), entry);
3152 }
3153
3154 for left_entry in &left_entries {
3155 match right_by_name.remove(left_entry.name.as_bytes()) {
3156 Some(right_entry) => {
3157 merge_tree_entry(
3158 db,
3159 format,
3160 prefix,
3161 Some(left_entry),
3162 Some(right_entry),
3163 left,
3164 right,
3165 )?;
3166 }
3167 None => {
3168 merge_tree_entry(db, format, prefix, Some(left_entry), None, left, right)?;
3169 }
3170 }
3171 }
3172 for right_entry in &right_entries {
3174 if right_by_name.contains_key(right_entry.name.as_bytes()) {
3175 merge_tree_entry(db, format, prefix, None, Some(right_entry), left, right)?;
3176 }
3177 }
3178 Ok(())
3179}
3180
3181fn merge_tree_entry(
3194 db: &FileObjectDatabase,
3195 format: ObjectFormat,
3196 prefix: &[u8],
3197 left_entry: Option<&TreeEntry>,
3198 right_entry: Option<&TreeEntry>,
3199 left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3200 right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3201) -> Result<()> {
3202 let left_is_tree = left_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3203 let right_is_tree = right_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3204
3205 if let (Some(left_entry), Some(right_entry)) = (left_entry, right_entry) {
3206 if left_is_tree && right_is_tree {
3207 if left_entry.oid == right_entry.oid {
3209 return Ok(());
3210 }
3211 let path = join_tree_path(prefix, left_entry.name.as_bytes());
3212 return diff_tree_pair(
3213 db,
3214 format,
3215 &left_entry.oid,
3216 &right_entry.oid,
3217 &path,
3218 left,
3219 right,
3220 );
3221 }
3222 if !left_is_tree && !right_is_tree {
3223 if left_entry.mode == right_entry.mode && left_entry.oid == right_entry.oid {
3227 return Ok(());
3228 }
3229 let path = join_tree_path(prefix, left_entry.name.as_bytes());
3230 left.insert(
3231 path.clone(),
3232 TrackedEntry {
3233 mode: left_entry.mode,
3234 oid: left_entry.oid,
3235 },
3236 );
3237 right.insert(
3238 path,
3239 TrackedEntry {
3240 mode: right_entry.mode,
3241 oid: right_entry.oid,
3242 },
3243 );
3244 return Ok(());
3245 }
3246 }
3249
3250 if let Some(left_entry) = left_entry {
3252 let path = join_tree_path(prefix, left_entry.name.as_bytes());
3253 if left_is_tree {
3254 collect_tree_entries(db, format, &left_entry.oid, path, left)?;
3255 } else {
3256 left.insert(
3257 path,
3258 TrackedEntry {
3259 mode: left_entry.mode,
3260 oid: left_entry.oid,
3261 },
3262 );
3263 }
3264 }
3265 if let Some(right_entry) = right_entry {
3267 let path = join_tree_path(prefix, right_entry.name.as_bytes());
3268 if right_is_tree {
3269 collect_tree_entries(db, format, &right_entry.oid, path, right)?;
3270 } else {
3271 right.insert(
3272 path,
3273 TrackedEntry {
3274 mode: right_entry.mode,
3275 oid: right_entry.oid,
3276 },
3277 );
3278 }
3279 }
3280 Ok(())
3281}
3282
3283fn index_gitlinks(index: &BTreeMap<Vec<u8>, TrackedEntry>) -> BTreeMap<Vec<u8>, ObjectId> {
3284 index
3285 .iter()
3286 .filter(|(_, entry)| entry.mode == 0o160000)
3287 .map(|(path, entry)| (path.clone(), entry.oid))
3288 .collect()
3289}
3290
3291fn candidate_path_set<'a>(candidate_paths: impl Iterator<Item = &'a Vec<u8>>) -> BTreeSet<Vec<u8>> {
3292 candidate_paths.cloned().collect()
3293}
3294
3295fn worktree_entries_for_path_set(
3296 worktree_root: &Path,
3297 format: ObjectFormat,
3298 candidates: &BTreeSet<Vec<u8>>,
3299 index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3300 stat_cache: Option<&IndexStatCache>,
3301) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3302 worktree_entries_for_unique_paths(
3303 worktree_root,
3304 format,
3305 candidates.iter(),
3306 index_gitlinks,
3307 stat_cache,
3308 )
3309}
3310
3311fn worktree_entries_for_unique_paths<'a>(
3312 worktree_root: &Path,
3313 format: ObjectFormat,
3314 candidates: impl Iterator<Item = &'a Vec<u8>>,
3315 index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3316 stat_cache: Option<&IndexStatCache>,
3317) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3318 let mut entries = BTreeMap::new();
3319 for git_path in candidates {
3320 if let Some(entry) =
3321 worktree_entry_for_path(worktree_root, format, &git_path, index_gitlinks, stat_cache)?
3322 {
3323 entries.insert(git_path.clone(), entry);
3324 }
3325 }
3326 Ok(entries)
3327}
3328
3329fn worktree_entry_for_path(
3330 worktree_root: &Path,
3331 format: ObjectFormat,
3332 git_path: &[u8],
3333 index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3334 stat_cache: Option<&IndexStatCache>,
3335) -> Result<Option<TrackedEntry>> {
3336 let path = worktree_path_for_repo_path(worktree_root, git_path);
3337 let metadata = match fs::symlink_metadata(&path) {
3338 Ok(metadata) => metadata,
3339 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3340 Err(err) => return Err(GitError::Io(err.to_string())),
3341 };
3342 let file_type = metadata.file_type();
3343 if let Some(staged_oid) = index_gitlinks.get(git_path)
3344 && metadata.is_dir()
3345 {
3346 let oid = gitlink_head_oid(&path, format).unwrap_or(*staged_oid);
3347 return Ok(Some(TrackedEntry {
3348 mode: 0o160000,
3349 oid,
3350 }));
3351 }
3352 if metadata.is_dir() {
3353 if let Some(oid) = gitlink_head_oid(&path, format) {
3354 return Ok(Some(TrackedEntry {
3355 mode: 0o160000,
3356 oid,
3357 }));
3358 }
3359 return Ok(None);
3360 }
3361 if !(metadata.is_file() || file_type.is_symlink()) {
3362 return Ok(None);
3363 }
3364 if let Some(entry) = stat_cache.and_then(|cache| cache.reusable_entry(git_path, &metadata)) {
3365 return Ok(Some(tracked_entry_from_index(entry)));
3366 }
3367 let body = if file_type.is_symlink() {
3368 symlink_target_bytes(&path)?
3369 } else {
3370 fs::read(&path)?
3371 };
3372 let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3373 let mode = if file_type.is_symlink() {
3374 0o120000
3375 } else {
3376 file_mode(&metadata)
3377 };
3378 Ok(Some(TrackedEntry { mode, oid }))
3379}
3380
3381fn index_worktree_change_for_entry(
3382 path: &Path,
3383 format: ObjectFormat,
3384 index_entry: &impl WorktreeIndexEntry,
3385 stat_cache: &IndexStatCache,
3386) -> Result<Option<NameStatusEntry>> {
3387 let git_path = index_entry.git_path();
3388 let metadata = match fs::symlink_metadata(path) {
3389 Ok(metadata) => metadata,
3390 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3391 return Ok(Some(index_worktree_deleted_entry(index_entry)));
3392 }
3393 Err(err) => return Err(GitError::Io(err.to_string())),
3394 };
3395 let file_type = metadata.file_type();
3396 let right = if metadata.is_dir() {
3397 if index_entry.mode() == 0o160000 {
3398 let oid = gitlink_head_oid(path, format).unwrap_or(index_entry.oid());
3399 Some(TrackedEntry {
3400 mode: 0o160000,
3401 oid,
3402 })
3403 } else if let Some(oid) = gitlink_head_oid(path, format) {
3404 Some(TrackedEntry {
3405 mode: 0o160000,
3406 oid,
3407 })
3408 } else {
3409 None
3410 }
3411 } else if metadata.is_file() || file_type.is_symlink() {
3412 if index_entry.reusable_with(stat_cache, &metadata) {
3413 return Ok(None);
3414 }
3415 let body = if file_type.is_symlink() {
3416 symlink_target_bytes(path)?
3417 } else {
3418 fs::read(path)?
3419 };
3420 let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3421 let mode = if file_type.is_symlink() {
3422 0o120000
3423 } else {
3424 file_mode(&metadata)
3425 };
3426 Some(TrackedEntry { mode, oid })
3427 } else {
3428 None
3429 };
3430 let Some(right) = right else {
3431 return Ok(Some(index_worktree_deleted_entry(index_entry)));
3432 };
3433 let left = tracked_entry_from_index(index_entry);
3434 if right == left {
3435 return Ok(None);
3436 }
3437 Ok(Some(NameStatusEntry {
3438 status: NameStatus::Modified,
3439 path: git_path.to_vec().into(),
3440 old_path: None,
3441 old_mode: Some(left.mode),
3442 new_mode: Some(right.mode),
3443 old_oid: Some(left.oid),
3444 new_oid: Some(right.oid),
3445 }))
3446}
3447
3448fn index_worktree_deleted_entry(index_entry: &impl WorktreeIndexEntry) -> NameStatusEntry {
3449 NameStatusEntry {
3450 status: NameStatus::Deleted,
3451 path: index_entry.git_path().to_vec().into(),
3452 old_path: None,
3453 old_mode: Some(index_entry.mode()),
3454 new_mode: None,
3455 old_oid: Some(index_entry.oid()),
3456 new_oid: None,
3457 }
3458}
3459
3460fn worktree_blob_cache_for_path_set(
3461 worktree_root: &Path,
3462 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3463 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3464 candidate_paths: &BTreeSet<Vec<u8>>,
3465 options: RenameDetectionOptions,
3466) -> Result<HashMap<ObjectId, Vec<u8>>> {
3467 worktree_blob_cache_for_unique_paths(
3468 worktree_root,
3469 left_entries,
3470 right_entries,
3471 candidate_paths.iter(),
3472 options,
3473 )
3474}
3475
3476fn worktree_blob_cache_for_unique_paths<'a>(
3477 worktree_root: &Path,
3478 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3479 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3480 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
3481 options: RenameDetectionOptions,
3482) -> Result<HashMap<ObjectId, Vec<u8>>> {
3483 if !options.detect_inexact || !(options.base.detect_renames || options.base.detect_copies) {
3484 return Ok(HashMap::new());
3485 }
3486 let base = options.base;
3487 let mut changes =
3488 raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
3489 if base.detect_renames {
3490 changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
3491 }
3492 if base.detect_copies {
3493 changes = detect_exact_copies(
3494 changes,
3495 left_entries,
3496 right_entries,
3497 base.find_copies_harder,
3498 base.rename_empty,
3499 );
3500 }
3501 let has_rename_source = base.detect_renames
3502 && changes.iter().any(|entry| {
3503 entry.status == NameStatus::Deleted
3504 && entry
3505 .old_oid
3506 .as_ref()
3507 .is_some_and(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3508 });
3509 let has_copy_source = base.detect_copies
3510 && (base.find_copies_harder
3511 || changes
3512 .iter()
3513 .any(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified)));
3514 if !has_rename_source && !has_copy_source {
3515 return Ok(HashMap::new());
3516 }
3517 let candidate_oids = changes
3518 .iter()
3519 .filter(|entry| entry.status == NameStatus::Added)
3520 .filter_map(|entry| entry.new_oid)
3521 .filter(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3522 .collect::<BTreeSet<_>>();
3523 if candidate_oids.is_empty() {
3524 return Ok(HashMap::new());
3525 }
3526 let mut cache = HashMap::new();
3527 for (git_path, entry) in right_entries {
3528 if entry.mode == 0o160000 || !candidate_oids.contains(&entry.oid) {
3529 continue;
3530 }
3531 let path = worktree_path_for_repo_path(worktree_root, git_path);
3532 let body = if entry.mode == 0o120000 {
3533 symlink_target_bytes(&path)?
3534 } else {
3535 fs::read(&path)?
3536 };
3537 cache.entry(entry.oid).or_insert(body);
3538 }
3539 Ok(cache)
3540}
3541
3542fn cache_or_odb_blob(
3545 cache: &HashMap<ObjectId, Vec<u8>>,
3546 db: &FileObjectDatabase,
3547 oid: &ObjectId,
3548) -> Option<Vec<u8>> {
3549 if let Some(bytes) = cache.get(oid) {
3550 return Some(bytes.clone());
3551 }
3552 read_blob_bytes(db, oid)
3553}
3554
3555#[cfg(unix)]
3556fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
3557 use std::ffi::OsStr;
3558 use std::os::unix::ffi::OsStrExt;
3559
3560 let mut out = PathBuf::from(worktree_root);
3561 out.push(OsStr::from_bytes(path));
3562 out
3563}
3564
3565#[cfg(unix)]
3566fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
3567 use std::ffi::OsStr;
3568 use std::os::unix::ffi::OsStrExt;
3569
3570 out.clear();
3571 out.push(worktree_root);
3572 out.push(OsStr::from_bytes(path));
3573}
3574
3575#[cfg(not(unix))]
3576fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
3577 worktree_root.join(repo_path_to_path(path))
3578}
3579
3580#[cfg(not(unix))]
3581fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
3582 out.clear();
3583 out.push(worktree_root);
3584 out.push(repo_path_to_path(path));
3585}
3586
3587#[cfg(not(unix))]
3588fn repo_path_to_path(path: &[u8]) -> PathBuf {
3589 let mut out = PathBuf::new();
3590 for component in String::from_utf8_lossy(path).split('/') {
3591 if !component.is_empty() {
3592 out.push(component);
3593 }
3594 }
3595 out
3596}
3597
3598#[cfg(unix)]
3599fn file_mode(metadata: &fs::Metadata) -> u32 {
3600 use std::os::unix::fs::PermissionsExt;
3601 if metadata.permissions().mode() & 0o111 != 0 {
3602 0o100755
3603 } else {
3604 0o100644
3605 }
3606}
3607
3608#[cfg(not(unix))]
3609fn file_mode(_metadata: &fs::Metadata) -> u32 {
3610 0o100644
3611}
3612
3613#[cfg(unix)]
3614fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
3615 use std::os::unix::ffi::OsStrExt;
3616 let target = fs::read_link(path)?;
3617 Ok(target.as_os_str().as_bytes().to_vec())
3618}
3619
3620#[cfg(not(unix))]
3621fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
3622 let target = fs::read_link(path)?;
3623 Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
3624}
3625
3626#[derive(Debug, Clone, PartialEq, Eq)]
3643pub enum HunkLine {
3644 Context(Vec<u8>),
3646 Insert(Vec<u8>),
3648 Delete(Vec<u8>),
3650}
3651
3652impl HunkLine {
3653 pub fn content(&self) -> &[u8] {
3655 match self {
3656 Self::Context(bytes) | Self::Insert(bytes) | Self::Delete(bytes) => bytes,
3657 }
3658 }
3659}
3660
3661#[derive(Debug, Clone, PartialEq, Eq)]
3668pub struct Hunk {
3669 pub old_start: usize,
3670 pub old_len: usize,
3671 pub new_start: usize,
3672 pub new_len: usize,
3673 pub lines: Vec<HunkLine>,
3674 pub old_no_newline: bool,
3676 pub new_no_newline: bool,
3678}
3679
3680#[derive(Debug, Clone, PartialEq, Eq)]
3682pub struct FilePatch {
3683 pub old_path: Option<Vec<u8>>,
3685 pub new_path: Option<Vec<u8>>,
3687 pub old_mode: Option<u32>,
3689 pub new_mode: Option<u32>,
3691 pub hunks: Vec<Hunk>,
3692 pub is_new: bool,
3694 pub is_delete: bool,
3696 pub is_rename: bool,
3698}
3699
3700#[derive(Debug, Clone, PartialEq, Eq)]
3702pub enum ApplyOutcome {
3703 Applied(Vec<u8>),
3705 Rejected,
3707}
3708
3709const MIN_FUZZ_CONTEXT: usize = usize::MAX;
3720
3721pub fn parse_unified_patch(input: &[u8]) -> Result<Vec<FilePatch>> {
3729 let lines = split_patch_lines(input);
3730 let mut parser = PatchParser {
3731 lines: &lines,
3732 index: 0,
3733 };
3734 parser.parse()
3735}
3736
3737pub fn apply_file_patch(base: &[u8], patch: &FilePatch) -> ApplyOutcome {
3762 if patch.is_delete && patch.hunks.is_empty() {
3764 return ApplyOutcome::Applied(Vec::new());
3765 }
3766 let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
3769
3770 let mut image = split_blob_lines(base_for_match);
3774
3775 let mut running_offset: isize = 0;
3778
3779 for hunk in &patch.hunks {
3780 match apply_one_hunk(&mut image, hunk, running_offset) {
3781 Some(drift) => running_offset += drift,
3782 None => return ApplyOutcome::Rejected,
3783 }
3784 }
3785
3786 ApplyOutcome::Applied(join_lines(&image))
3787}
3788
3789fn apply_one_hunk(image: &mut Vec<Line>, hunk: &Hunk, running_offset: isize) -> Option<isize> {
3797 let mut preimage: Vec<Line> = Vec::new();
3801 let mut postimage: Vec<Line> = Vec::new();
3802 let mut leading = 0usize; let mut trailing = 0usize; let mut seen_change = false;
3805 for hl in &hunk.lines {
3806 match hl {
3807 HunkLine::Context(bytes) => {
3808 preimage.push(Line {
3809 content: bytes.clone(),
3810 no_newline: false,
3811 });
3812 postimage.push(Line {
3813 content: bytes.clone(),
3814 no_newline: false,
3815 });
3816 if !seen_change {
3817 leading += 1;
3818 }
3819 trailing += 1;
3820 }
3821 HunkLine::Delete(bytes) => {
3822 preimage.push(Line {
3823 content: bytes.clone(),
3824 no_newline: false,
3825 });
3826 seen_change = true;
3827 trailing = 0;
3828 }
3829 HunkLine::Insert(bytes) => {
3830 postimage.push(Line {
3831 content: bytes.clone(),
3832 no_newline: false,
3833 });
3834 seen_change = true;
3835 trailing = 0;
3836 }
3837 }
3838 }
3839
3840 if hunk.old_no_newline && let Some(last) = preimage.last_mut() {
3844 last.no_newline = true;
3845 }
3846 if hunk.new_no_newline && let Some(last) = postimage.last_mut() {
3847 last.no_newline = true;
3848 }
3849
3850 let mut match_beginning = hunk.old_start <= 1;
3855 let mut match_end = trailing == 0;
3856
3857 let mut expected = expected_position(hunk, running_offset);
3862 let hunk_expected = expected;
3865
3866 loop {
3867 if let Some(pos) = find_hunk_pos(image, &preimage, expected, match_beginning, match_end) {
3868 let take = preimage.len();
3870 let replacement: Vec<Line> = postimage.clone();
3871 image.splice(pos..pos + take, replacement);
3872 return Some(pos as isize - hunk_expected);
3873 }
3874
3875 #[allow(clippy::absurd_extreme_comparisons)]
3884 if leading <= MIN_FUZZ_CONTEXT && trailing <= MIN_FUZZ_CONTEXT {
3885 return None;
3886 }
3887
3888 if match_beginning || match_end {
3891 match_beginning = false;
3892 match_end = false;
3893 continue;
3894 }
3895
3896 if leading >= trailing {
3898 preimage.remove(0);
3900 postimage.remove(0);
3901 expected -= 1;
3902 leading -= 1;
3903 }
3904 if trailing > leading {
3905 preimage.pop();
3906 postimage.pop();
3907 trailing -= 1;
3908 }
3909 }
3910}
3911
3912#[derive(Debug, Clone, PartialEq, Eq)]
3914struct Line {
3915 content: Vec<u8>,
3916 no_newline: bool,
3917}
3918
3919fn split_blob_lines(data: &[u8]) -> Vec<Line> {
3924 let mut lines = Vec::new();
3925 let mut start = 0usize;
3926 while start < data.len() {
3927 match data[start..].iter().position(|&b| b == b'\n') {
3928 Some(rel) => {
3929 let end = start + rel;
3930 lines.push(Line {
3931 content: data[start..end].to_vec(),
3932 no_newline: false,
3933 });
3934 start = end + 1;
3935 }
3936 None => {
3937 lines.push(Line {
3938 content: data[start..].to_vec(),
3939 no_newline: true,
3940 });
3941 start = data.len();
3942 }
3943 }
3944 }
3945 lines
3946}
3947
3948fn join_lines(lines: &[Line]) -> Vec<u8> {
3950 let mut out = Vec::new();
3951 for line in lines {
3952 out.extend_from_slice(&line.content);
3953 if !line.no_newline {
3954 out.push(b'\n');
3955 }
3956 }
3957 out
3958}
3959
3960fn expected_position(hunk: &Hunk, running_offset: isize) -> isize {
3963 let base = if hunk.old_start == 0 {
3965 0
3966 } else {
3967 hunk.old_start as isize - 1
3968 };
3969 base + running_offset
3970}
3971
3972fn find_hunk_pos(
3980 image: &[Line],
3981 preimage: &[Line],
3982 expected: isize,
3983 match_beginning: bool,
3984 match_end: bool,
3985) -> Option<usize> {
3986 let line_nr = image.len();
3987 let pre_nr = preimage.len();
3988
3989 let mut line: isize = if match_beginning {
3992 0
3993 } else if match_end {
3994 line_nr as isize - pre_nr as isize
3995 } else {
3996 expected
3997 };
3998 if line < 0 {
3999 line = 0;
4000 }
4001 if line as usize > line_nr {
4002 line = line_nr as isize;
4003 }
4004
4005 let start = line as usize;
4006 let mut backwards = start;
4007 let mut forwards = start;
4008 let mut current = start;
4009
4010 let mut i: u64 = 0;
4011 loop {
4012 if preimage_matches_at(image, preimage, current, match_beginning, match_end) {
4013 return Some(current);
4014 }
4015
4016 loop {
4017 if backwards == 0 && forwards == line_nr {
4019 return None;
4020 }
4021 if i & 1 == 1 {
4022 if backwards == 0 {
4024 i += 1;
4025 continue;
4026 }
4027 backwards -= 1;
4028 current = backwards;
4029 } else {
4030 if forwards == line_nr {
4032 i += 1;
4033 continue;
4034 }
4035 forwards += 1;
4036 current = forwards;
4037 }
4038 break;
4039 }
4040 i += 1;
4041 }
4042}
4043
4044fn preimage_matches_at(
4053 image: &[Line],
4054 preimage: &[Line],
4055 pos: usize,
4056 match_beginning: bool,
4057 match_end: bool,
4058) -> bool {
4059 if match_beginning && pos != 0 {
4060 return false;
4061 }
4062 if pos + preimage.len() > image.len() {
4064 return false;
4065 }
4066 if match_end && pos + preimage.len() != image.len() {
4067 return false;
4068 }
4069 for (i, pre) in preimage.iter().enumerate() {
4070 let img = &image[pos + i];
4071 if img.content != pre.content {
4072 return false;
4073 }
4074 if pre.no_newline != img.no_newline {
4080 return false;
4081 }
4082 }
4083 true
4084}
4085
4086fn split_patch_lines(input: &[u8]) -> Vec<&[u8]> {
4090 let mut lines = Vec::new();
4091 let mut start = 0usize;
4092 while start < input.len() {
4093 match input[start..].iter().position(|&b| b == b'\n') {
4094 Some(rel) => {
4095 let end = start + rel;
4096 lines.push(&input[start..end]);
4097 start = end + 1;
4098 }
4099 None => {
4100 lines.push(&input[start..]);
4101 start = input.len();
4102 }
4103 }
4104 }
4105 lines
4106}
4107
4108struct PatchParser<'a> {
4109 lines: &'a [&'a [u8]],
4110 index: usize,
4111}
4112
4113impl<'a> PatchParser<'a> {
4114 fn parse(&mut self) -> Result<Vec<FilePatch>> {
4115 let mut patches = Vec::new();
4116 while self.index < self.lines.len() {
4117 let line = self.lines[self.index];
4118 if line.starts_with(b"diff --git ") {
4119 patches.push(self.parse_file(Some(line))?);
4120 } else if line.starts_with(b"--- ") {
4121 patches.push(self.parse_file(None)?);
4123 } else if line.starts_with(b"@@ ") {
4124 return Err(GitError::InvalidFormat(
4125 "hunk header encountered before any file header".to_string(),
4126 ));
4127 } else {
4128 self.index += 1;
4130 }
4131 }
4132 Ok(patches)
4133 }
4134
4135 fn parse_file(&mut self, diff_line: Option<&[u8]>) -> Result<FilePatch> {
4139 let mut patch = FilePatch {
4140 old_path: None,
4141 new_path: None,
4142 old_mode: None,
4143 new_mode: None,
4144 hunks: Vec::new(),
4145 is_new: false,
4146 is_delete: false,
4147 is_rename: false,
4148 };
4149 if let Some(diff_line) = diff_line {
4152 if let Some((a, b)) = parse_diff_git_paths(diff_line) {
4153 patch.old_path = Some(a);
4154 patch.new_path = Some(b);
4155 }
4156 self.index += 1;
4157 }
4158
4159 while self.index < self.lines.len() {
4161 let line = self.lines[self.index];
4162 if line.starts_with(b"--- ") {
4163 self.parse_old_file_header(line, &mut patch);
4164 self.index += 1;
4165 break;
4166 } else if line.starts_with(b"@@ ") {
4167 break;
4169 } else if line.starts_with(b"diff --git ") {
4170 return Ok(patch);
4172 } else if let Some(rest) = strip_prefix(line, b"old mode ") {
4173 patch.old_mode = parse_octal(rest);
4174 } else if let Some(rest) = strip_prefix(line, b"new mode ") {
4175 patch.new_mode = parse_octal(rest);
4176 } else if let Some(rest) = strip_prefix(line, b"new file mode ") {
4177 patch.is_new = true;
4178 patch.new_mode = parse_octal(rest);
4179 } else if let Some(rest) = strip_prefix(line, b"deleted file mode ") {
4180 patch.is_delete = true;
4181 patch.old_mode = parse_octal(rest);
4182 } else if let Some(rest) = strip_prefix(line, b"rename from ") {
4183 patch.is_rename = true;
4184 patch.old_path = Some(rest.to_vec());
4185 } else if let Some(rest) = strip_prefix(line, b"rename to ") {
4186 patch.is_rename = true;
4187 patch.new_path = Some(rest.to_vec());
4188 } else {
4189 self.index += 1;
4191 continue;
4192 }
4193 self.index += 1;
4194 }
4195
4196 if self.index < self.lines.len() && self.lines[self.index].starts_with(b"+++ ") {
4198 self.parse_new_file_header(self.lines[self.index], &mut patch);
4199 self.index += 1;
4200 }
4201
4202 while self.index < self.lines.len() {
4204 let line = self.lines[self.index];
4205 if line.starts_with(b"@@ ") {
4206 let hunk = self.parse_hunk()?;
4207 patch.hunks.push(hunk);
4208 } else if line.starts_with(b"diff --git ") {
4209 break;
4210 } else if line.starts_with(b"--- ") {
4211 break;
4213 } else {
4214 self.index += 1;
4216 }
4217 }
4218
4219 Ok(patch)
4220 }
4221
4222 fn parse_old_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4223 let rest = strip_prefix(line, b"--- ").unwrap_or(line);
4224 let path = strip_header_path(rest);
4225 match path {
4226 HeaderPath::DevNull => {
4227 patch.is_new = true;
4228 patch.old_path = None;
4229 }
4230 HeaderPath::Path(p) => {
4231 if patch.old_path.is_none() || !patch.is_rename {
4233 patch.old_path = Some(p);
4234 }
4235 }
4236 }
4237 }
4238
4239 fn parse_new_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4240 let rest = strip_prefix(line, b"+++ ").unwrap_or(line);
4241 let path = strip_header_path(rest);
4242 match path {
4243 HeaderPath::DevNull => {
4244 patch.is_delete = true;
4245 patch.new_path = None;
4246 }
4247 HeaderPath::Path(p) => {
4248 if patch.new_path.is_none() || !patch.is_rename {
4249 patch.new_path = Some(p);
4250 }
4251 }
4252 }
4253 }
4254
4255 fn parse_hunk(&mut self) -> Result<Hunk> {
4256 let header = self.lines[self.index];
4257 let (old_start, old_len, new_start, new_len) = parse_hunk_header(header)?;
4258 self.index += 1;
4259
4260 let mut hunk = Hunk {
4261 old_start,
4262 old_len,
4263 new_start,
4264 new_len,
4265 lines: Vec::new(),
4266 old_no_newline: false,
4267 new_no_newline: false,
4268 };
4269 let mut old_seen = 0usize;
4270 let mut new_seen = 0usize;
4271
4272 while self.index < self.lines.len() {
4273 if old_seen >= old_len && new_seen >= new_len {
4275 break;
4276 }
4277 let line = self.lines[self.index];
4278 if line.is_empty() {
4279 hunk.lines.push(HunkLine::Context(Vec::new()));
4283 old_seen += 1;
4284 new_seen += 1;
4285 self.index += 1;
4286 continue;
4287 }
4288 match line[0] {
4289 b' ' => {
4290 hunk.lines.push(HunkLine::Context(line[1..].to_vec()));
4291 old_seen += 1;
4292 new_seen += 1;
4293 }
4294 b'+' => {
4295 hunk.lines.push(HunkLine::Insert(line[1..].to_vec()));
4296 new_seen += 1;
4297 }
4298 b'-' => {
4299 hunk.lines.push(HunkLine::Delete(line[1..].to_vec()));
4300 old_seen += 1;
4301 }
4302 b'\\' => {
4303 self.mark_no_newline(&mut hunk);
4306 self.index += 1;
4307 continue;
4308 }
4309 _ => {
4310 break;
4312 }
4313 }
4314 self.index += 1;
4315 }
4316
4317 if self.index < self.lines.len() && self.lines[self.index].starts_with(b"\\") {
4320 self.mark_no_newline(&mut hunk);
4321 self.index += 1;
4322 }
4323
4324 if old_seen != old_len || new_seen != new_len {
4325 return Err(GitError::InvalidFormat(format!(
4326 "hunk body line counts mismatch: header declared -{old_len},+{new_len} \
4327 but body had -{old_seen},+{new_seen}"
4328 )));
4329 }
4330
4331 Ok(hunk)
4332 }
4333
4334 fn mark_no_newline(&self, hunk: &mut Hunk) {
4337 match hunk.lines.last() {
4338 Some(HunkLine::Context(_)) => {
4339 hunk.old_no_newline = true;
4340 hunk.new_no_newline = true;
4341 }
4342 Some(HunkLine::Insert(_)) => hunk.new_no_newline = true,
4343 Some(HunkLine::Delete(_)) => hunk.old_no_newline = true,
4344 None => {}
4345 }
4346 }
4347}
4348
4349enum HeaderPath {
4350 DevNull,
4351 Path(Vec<u8>),
4352}
4353
4354fn strip_header_path(rest: &[u8]) -> HeaderPath {
4358 let path = match rest.iter().position(|&b| b == b'\t') {
4360 Some(tab) => &rest[..tab],
4361 None => rest,
4362 };
4363 let path = trim_ascii_end(path);
4364 if path == b"/dev/null" {
4365 return HeaderPath::DevNull;
4366 }
4367 let stripped = if path.starts_with(b"a/") || path.starts_with(b"b/") {
4369 &path[2..]
4370 } else {
4371 path
4372 };
4373 HeaderPath::Path(stripped.to_vec())
4374}
4375
4376fn parse_diff_git_paths(line: &[u8]) -> Option<(Vec<u8>, Vec<u8>)> {
4380 let rest = strip_prefix(line, b"diff --git ")?;
4381 if rest.first() == Some(&b'"') {
4384 return None;
4385 }
4386 if !rest.starts_with(b"a/") {
4390 return None;
4391 }
4392 let sep = find_subslice(rest, b" b/")?;
4393 let a = &rest[2..sep];
4394 let b = &rest[sep + 3..];
4395 Some((a.to_vec(), b.to_vec()))
4396}
4397
4398fn parse_hunk_header(line: &[u8]) -> Result<(usize, usize, usize, usize)> {
4401 let err = || GitError::InvalidFormat(format!("malformed hunk header: {}", lossy(line)));
4402 let rest = strip_prefix(line, b"@@ ").ok_or_else(err)?;
4403 let close = find_subslice(rest, b" @@").ok_or_else(err)?;
4405 let ranges = &rest[..close];
4406 let mut parts = ranges.split(|&b| b == b' ').filter(|p| !p.is_empty());
4407 let old = parts.next().ok_or_else(err)?;
4408 let new = parts.next().ok_or_else(err)?;
4409 let old = strip_prefix(old, b"-").ok_or_else(err)?;
4410 let new = strip_prefix(new, b"+").ok_or_else(err)?;
4411 let (old_start, old_len) = parse_range(old).ok_or_else(err)?;
4412 let (new_start, new_len) = parse_range(new).ok_or_else(err)?;
4413 Ok((old_start, old_len, new_start, new_len))
4414}
4415
4416fn parse_range(range: &[u8]) -> Option<(usize, usize)> {
4418 match range.iter().position(|&b| b == b',') {
4419 Some(comma) => {
4420 let start = parse_usize(&range[..comma])?;
4421 let len = parse_usize(&range[comma + 1..])?;
4422 Some((start, len))
4423 }
4424 None => Some((parse_usize(range)?, 1)),
4425 }
4426}
4427
4428fn parse_usize(bytes: &[u8]) -> Option<usize> {
4429 if bytes.is_empty() {
4430 return None;
4431 }
4432 let mut value: usize = 0;
4433 for &b in bytes {
4434 if !b.is_ascii_digit() {
4435 return None;
4436 }
4437 value = value.checked_mul(10)?.checked_add((b - b'0') as usize)?;
4438 }
4439 Some(value)
4440}
4441
4442fn parse_octal(bytes: &[u8]) -> Option<u32> {
4443 let trimmed = trim_ascii_end(bytes);
4444 if trimmed.is_empty() {
4445 return None;
4446 }
4447 let mut value: u32 = 0;
4448 for &b in trimmed {
4449 if !(b'0'..=b'7').contains(&b) {
4450 return None;
4451 }
4452 value = value.checked_mul(8)?.checked_add((b - b'0') as u32)?;
4453 }
4454 Some(value)
4455}
4456
4457fn strip_prefix<'b>(line: &'b [u8], prefix: &[u8]) -> Option<&'b [u8]> {
4458 if line.starts_with(prefix) {
4459 Some(&line[prefix.len()..])
4460 } else {
4461 None
4462 }
4463}
4464
4465fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
4466 if needle.is_empty() || needle.len() > haystack.len() {
4467 return None;
4468 }
4469 haystack
4470 .windows(needle.len())
4471 .position(|window| window == needle)
4472}
4473
4474fn trim_ascii_end(bytes: &[u8]) -> &[u8] {
4475 let mut end = bytes.len();
4476 while end > 0 && (bytes[end - 1] == b' ' || bytes[end - 1] == b'\r') {
4477 end -= 1;
4478 }
4479 &bytes[..end]
4480}
4481
4482fn lossy(bytes: &[u8]) -> String {
4483 String::from_utf8_lossy(bytes).into_owned()
4484}
4485
4486pub type MergeEntryMap = BTreeMap<Vec<u8>, (u32, ObjectId)>;
4509
4510#[derive(Clone, Copy, PartialEq, Eq, Debug)]
4513pub enum MergeFavor {
4514 None,
4516 Ours,
4518 Theirs,
4520}
4521
4522pub struct MergeTreesOptions<'a> {
4524 pub ours_label: &'a str,
4526 pub theirs_label: &'a str,
4528 pub ancestor_label: &'a str,
4531 pub favor: MergeFavor,
4533 pub detect_renames: bool,
4537 pub rename_threshold: u8,
4539 pub directory_renames: DirectoryRenames,
4545 pub style: ConflictStyle,
4547}
4548
4549#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
4552pub enum DirectoryRenames {
4553 #[default]
4555 False,
4556 True,
4558 Conflict,
4561}
4562
4563impl Default for MergeTreesOptions<'_> {
4564 fn default() -> Self {
4565 Self {
4566 ours_label: "ours",
4567 theirs_label: "theirs",
4568 ancestor_label: "merged common ancestors",
4569 favor: MergeFavor::None,
4570 detect_renames: false,
4571 rename_threshold: DEFAULT_RENAME_THRESHOLD,
4572 directory_renames: DirectoryRenames::False,
4573 style: ConflictStyle::Merge,
4574 }
4575 }
4576}
4577
4578#[derive(Debug, Clone, PartialEq, Eq)]
4581pub enum MergeConflictKind {
4582 Content { add_add: bool },
4585 ModifyDelete {
4587 deleted_in: String,
4589 modified_in: String,
4591 },
4592 RenameContent {
4595 old_path: Vec<u8>,
4597 },
4598 RenameDelete {
4600 old_path: Vec<u8>,
4602 renamed_in: String,
4604 deleted_in: String,
4606 },
4607}
4608
4609#[derive(Debug, Clone)]
4611pub struct MergedPath {
4612 pub path: Vec<u8>,
4614 pub stages: MergeStages,
4617 pub result: Option<(u32, ObjectId)>,
4620 pub worktree: Option<(u32, Vec<u8>)>,
4624 pub conflict: Option<MergeConflictKind>,
4626 pub auto_merged: bool,
4631}
4632
4633impl MergedPath {
4634 pub fn is_clean(&self) -> bool {
4636 self.conflict.is_none()
4637 }
4638}
4639
4640#[derive(Debug, Clone, Default)]
4642pub struct MergeStages {
4643 pub base: Option<(u32, ObjectId)>,
4644 pub ours: Option<(u32, ObjectId)>,
4645 pub theirs: Option<(u32, ObjectId)>,
4646}
4647
4648#[derive(Debug, Clone)]
4651pub struct MergeTreesResult {
4652 pub tree: ObjectId,
4655 pub paths: Vec<MergedPath>,
4657 pub clean: bool,
4659}
4660
4661impl MergeTreesResult {
4662 pub fn conflicts(&self) -> impl Iterator<Item = &MergedPath> {
4664 self.paths.iter().filter(|entry| entry.conflict.is_some())
4665 }
4666}
4667
4668pub fn flatten_tree(
4671 reader: &impl ObjectReader,
4672 format: ObjectFormat,
4673 tree_oid: &ObjectId,
4674) -> Result<MergeEntryMap> {
4675 let mut entries = BTreeMap::new();
4676 if *tree_oid == empty_tree_oid(format)? {
4677 return Ok(entries);
4678 }
4679 collect_flat_tree(reader, format, tree_oid, Vec::new(), &mut entries)?;
4680 Ok(entries)
4681}
4682
4683fn collect_flat_tree(
4684 reader: &impl ObjectReader,
4685 format: ObjectFormat,
4686 tree_oid: &ObjectId,
4687 prefix: Vec<u8>,
4688 entries: &mut MergeEntryMap,
4689) -> Result<()> {
4690 let object = reader.read_object(tree_oid)?;
4691 if object.object_type != ObjectType::Tree {
4692 return Err(GitError::InvalidObject(format!(
4693 "expected tree {}, found {}",
4694 tree_oid,
4695 object.object_type.as_str()
4696 )));
4697 }
4698 for entry in TreeEntries::new(format, &object.body) {
4699 let entry = entry?;
4700 let mut path = prefix.clone();
4701 if !path.is_empty() {
4702 path.push(b'/');
4703 }
4704 path.extend_from_slice(entry.name);
4705 if entry.mode == 0o040000 {
4706 collect_flat_tree(reader, format, &entry.oid, path, entries)?;
4707 } else {
4708 entries.insert(path, (entry.mode, entry.oid));
4709 }
4710 }
4711 Ok(())
4712}
4713
4714pub fn is_mergeable_file_mode(mode: u32) -> bool {
4717 mode == 0o100644 || mode == 0o100755
4718}
4719
4720pub fn merge_trees(
4733 db: &FileObjectDatabase,
4734 format: ObjectFormat,
4735 base: Option<&ObjectId>,
4736 ours: &ObjectId,
4737 theirs: &ObjectId,
4738 options: &MergeTreesOptions<'_>,
4739) -> Result<MergeTreesResult> {
4740 let base_map = match base {
4741 Some(tree) => flatten_tree(db, format, tree)?,
4742 None => MergeEntryMap::new(),
4743 };
4744 let ours_map = flatten_tree(db, format, ours)?;
4745 let theirs_map = flatten_tree(db, format, theirs)?;
4746 merge_entry_maps(db, format, &base_map, &ours_map, &theirs_map, options)
4747}
4748
4749pub fn merge_entry_maps(
4753 db: &FileObjectDatabase,
4754 format: ObjectFormat,
4755 base_map: &MergeEntryMap,
4756 ours_map: &MergeEntryMap,
4757 theirs_map: &MergeEntryMap,
4758 options: &MergeTreesOptions<'_>,
4759) -> Result<MergeTreesResult> {
4760 let (renames, side_renames) = if options.detect_renames {
4767 let (renames, ours_side, theirs_side) =
4768 detect_merge_renames(db, format, base_map, ours_map, theirs_map, options)?;
4769 (renames, Some((ours_side, theirs_side)))
4770 } else {
4771 (MergeRenames::default(), None)
4772 };
4773
4774 let (eff_base, mut eff_ours, mut eff_theirs) =
4776 apply_merge_renames(base_map, ours_map, theirs_map, &renames);
4777
4778 if options.directory_renames != DirectoryRenames::False
4784 && let Some((ours_side, theirs_side)) = &side_renames
4785 {
4786 let dir_renames =
4787 compute_directory_renames(base_map, ours_map, theirs_map, ours_side, theirs_side);
4788 let (new_ours, new_theirs, _rehomed) =
4789 apply_directory_renames(&eff_base, &eff_ours, &eff_theirs, &dir_renames);
4790 eff_ours = new_ours;
4791 eff_theirs = new_theirs;
4792 }
4793
4794 let mut all_paths = BTreeSet::new();
4795 all_paths.extend(eff_base.keys().cloned());
4796 all_paths.extend(eff_ours.keys().cloned());
4797 all_paths.extend(eff_theirs.keys().cloned());
4798
4799 let mut paths: Vec<MergedPath> = Vec::new();
4800 let mut leaves: MergeEntryMap = BTreeMap::new();
4801 let mut clean = true;
4802
4803 for path in all_paths {
4804 let base = eff_base.get(&path).cloned();
4805 let ours = eff_ours.get(&path).cloned();
4806 let theirs = eff_theirs.get(&path).cloned();
4807 let rename = renames.dest_to_source.get(&path);
4808 let old_path = rename.map(|r| r.source.clone());
4809
4810 if ours == theirs {
4812 if let Some(entry) = ours {
4813 leaves.insert(path.clone(), entry);
4814 }
4815 paths.push(clean_path(path, ours));
4816 continue;
4817 }
4818 if ours == base {
4819 if let Some(entry) = &theirs {
4820 leaves.insert(path.clone(), *entry);
4821 }
4822 paths.push(clean_path(path, theirs));
4823 continue;
4824 }
4825 if theirs == base {
4826 if let Some(entry) = &ours {
4827 leaves.insert(path.clone(), *entry);
4828 }
4829 paths.push(clean_path(path, ours));
4830 continue;
4831 }
4832
4833 let content_mergeable = matches!(&ours, Some((mode, _)) if is_mergeable_file_mode(*mode))
4835 && matches!(&theirs, Some((mode, _)) if is_mergeable_file_mode(*mode))
4836 && match &base {
4837 Some((mode, _)) => is_mergeable_file_mode(*mode),
4838 None => true,
4839 };
4840
4841 if let (true, Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
4842 (content_mergeable, &ours, &theirs)
4843 {
4844 let add_add = base.is_none();
4845 let base_bytes = match &base {
4846 Some((_, oid)) => merge_blob_bytes(db, oid)?,
4847 None => Vec::new(),
4848 };
4849 let ours_bytes = merge_blob_bytes(db, ours_oid)?;
4850 let theirs_bytes = merge_blob_bytes(db, theirs_oid)?;
4851 let (ours_label, theirs_label) = match rename {
4856 Some(MergeRename { source, side }) => {
4857 let (ours_path, theirs_path) = match side {
4858 RenameSide::Theirs => (source.as_slice(), path.as_slice()),
4860 RenameSide::Ours => (path.as_slice(), source.as_slice()),
4862 };
4863 (
4864 qualify_label(options.ours_label, ours_path),
4865 qualify_label(options.theirs_label, theirs_path),
4866 )
4867 }
4868 None => (
4869 options.ours_label.to_string(),
4870 options.theirs_label.to_string(),
4871 ),
4872 };
4873 let result = merge_blobs(
4874 &base_bytes,
4875 &ours_bytes,
4876 &theirs_bytes,
4877 &MergeBlobOptions {
4878 ours_label: &ours_label,
4879 theirs_label: &theirs_label,
4880 base_label: options.ancestor_label,
4881 style: options.style,
4882 },
4883 );
4884
4885 let base_mode = base.as_ref().map(|(mode, _)| *mode);
4886 let (resolved_mode, mode_conflict) =
4887 merge_file_modes(base_mode, *ours_mode, *theirs_mode);
4888
4889 if !result.conflicted && !mode_conflict {
4890 let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content))?;
4891 leaves.insert(path.clone(), (resolved_mode, oid));
4892 paths.push(clean_path_auto(path, Some((resolved_mode, oid)), true));
4893 } else if options.favor != MergeFavor::None && !mode_conflict {
4894 let chosen = if options.favor == MergeFavor::Ours {
4895 ours
4896 } else {
4897 theirs
4898 };
4899 if let Some(entry) = chosen {
4900 leaves.insert(path.clone(), entry);
4901 }
4902 paths.push(clean_path_auto(path, chosen, true));
4903 } else {
4904 clean = false;
4905 let oid =
4906 db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
4907 leaves.insert(path.clone(), (resolved_mode, oid));
4908 let worktree_mode = if *ours_mode == *theirs_mode {
4909 *ours_mode
4910 } else {
4911 0o100644
4912 };
4913 let conflict = match &old_path {
4914 Some(old) => MergeConflictKind::RenameContent {
4915 old_path: old.clone(),
4916 },
4917 None => MergeConflictKind::Content { add_add },
4918 };
4919 paths.push(MergedPath {
4920 path: path.clone(),
4921 stages: stages_for(&base, &ours, &theirs),
4922 result: Some((resolved_mode, oid)),
4923 worktree: Some((worktree_mode, result.content)),
4924 conflict: Some(conflict),
4925 auto_merged: true,
4926 });
4927 }
4928 } else if base.is_some() && (ours.is_none() || theirs.is_none()) {
4929 clean = false;
4931 let (deleted_in, modified_in, surviving) = if ours.is_none() {
4932 (
4933 options.ours_label.to_string(),
4934 options.theirs_label.to_string(),
4935 theirs,
4936 )
4937 } else {
4938 (
4939 options.theirs_label.to_string(),
4940 options.ours_label.to_string(),
4941 ours,
4942 )
4943 };
4944 let worktree = match &surviving {
4945 Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
4946 None => None,
4947 };
4948 if let Some(entry) = surviving {
4949 leaves.insert(path.clone(), entry);
4950 }
4951 paths.push(MergedPath {
4952 path: path.clone(),
4953 stages: stages_for(&base, &ours, &theirs),
4954 result: surviving,
4955 worktree,
4956 conflict: Some(MergeConflictKind::ModifyDelete {
4957 deleted_in,
4958 modified_in,
4959 }),
4960 auto_merged: false,
4961 });
4962 } else {
4963 clean = false;
4966 let add_add = base.is_none();
4967 let surviving = ours.or(theirs);
4968 let worktree = match &surviving {
4969 Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
4970 None => None,
4971 };
4972 if let Some(entry) = surviving {
4973 leaves.insert(path.clone(), entry);
4974 }
4975 paths.push(MergedPath {
4976 path: path.clone(),
4977 stages: stages_for(&base, &ours, &theirs),
4978 result: surviving,
4979 worktree,
4980 conflict: Some(MergeConflictKind::Content { add_add }),
4981 auto_merged: false,
4982 });
4983 }
4984 }
4985
4986 if !renames.rename_deletes.is_empty() {
4991 for (dest, rd) in &renames.rename_deletes {
4992 let Some(slot) = paths.iter_mut().find(|p| &p.path == dest) else {
4994 continue;
4995 };
4996 if slot.conflict.is_some() {
4997 continue;
4998 }
4999 let base_entry = base_map.get(&rd.source).copied();
5000 let renamed_entry = slot.result;
5001 let (ours_stage, theirs_stage) = match rd.side {
5004 RenameSide::Ours => (renamed_entry, None),
5005 RenameSide::Theirs => (None, renamed_entry),
5006 };
5007 let (renamed_in, deleted_in) = match rd.side {
5008 RenameSide::Ours => (
5009 options.ours_label.to_string(),
5010 options.theirs_label.to_string(),
5011 ),
5012 RenameSide::Theirs => (
5013 options.theirs_label.to_string(),
5014 options.ours_label.to_string(),
5015 ),
5016 };
5017 let worktree = match &renamed_entry {
5018 Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
5019 None => None,
5020 };
5021 slot.stages = MergeStages {
5022 base: base_entry,
5023 ours: ours_stage,
5024 theirs: theirs_stage,
5025 };
5026 slot.worktree = worktree;
5027 slot.conflict = Some(MergeConflictKind::RenameDelete {
5028 old_path: rd.source.clone(),
5029 renamed_in,
5030 deleted_in,
5031 });
5032 clean = false;
5033 }
5034 }
5035
5036 let tree = write_merged_tree(db, &leaves)?;
5037
5038 Ok(MergeTreesResult { tree, paths, clean })
5039}
5040
5041fn clean_path(path: Vec<u8>, result: Option<(u32, ObjectId)>) -> MergedPath {
5043 clean_path_auto(path, result, false)
5044}
5045
5046fn clean_path_auto(
5049 path: Vec<u8>,
5050 result: Option<(u32, ObjectId)>,
5051 auto_merged: bool,
5052) -> MergedPath {
5053 MergedPath {
5054 path,
5055 stages: MergeStages::default(),
5056 result,
5057 worktree: None,
5058 conflict: None,
5059 auto_merged,
5060 }
5061}
5062
5063fn stages_for(
5065 base: &Option<(u32, ObjectId)>,
5066 ours: &Option<(u32, ObjectId)>,
5067 theirs: &Option<(u32, ObjectId)>,
5068) -> MergeStages {
5069 MergeStages {
5070 base: *base,
5071 ours: *ours,
5072 theirs: *theirs,
5073 }
5074}
5075
5076fn merge_blob_bytes(reader: &impl ObjectReader, oid: &ObjectId) -> Result<Vec<u8>> {
5078 let object = reader.read_object(oid)?;
5079 if object.object_type != ObjectType::Blob {
5080 return Err(GitError::InvalidObject(format!(
5081 "expected blob {}, found {}",
5082 oid,
5083 object.object_type.as_str()
5084 )));
5085 }
5086 Ok(object.body.clone())
5087}
5088
5089fn merge_file_modes(base: Option<u32>, ours: u32, theirs: u32) -> (u32, bool) {
5092 if ours == theirs {
5093 return (ours, false);
5094 }
5095 match base {
5096 Some(base) if ours == base => (theirs, false),
5097 Some(base) if theirs == base => (ours, false),
5098 _ => (ours, true),
5099 }
5100}
5101
5102fn write_merged_tree(db: &FileObjectDatabase, leaves: &MergeEntryMap) -> Result<ObjectId> {
5105 let mut root = MergeTreeNode::default();
5106 for (path, (mode, oid)) in leaves {
5107 root.insert(path, *mode, *oid);
5108 }
5109 root.write(db)
5110}
5111
5112#[derive(Default)]
5113struct MergeTreeNode {
5114 blobs: BTreeMap<Vec<u8>, (u32, ObjectId)>,
5115 subtrees: BTreeMap<Vec<u8>, MergeTreeNode>,
5116}
5117
5118impl MergeTreeNode {
5119 fn insert(&mut self, path: &[u8], mode: u32, oid: ObjectId) {
5120 match path.iter().position(|byte| *byte == b'/') {
5121 Some(slash) => {
5122 let component = path[..slash].to_vec();
5123 let rest = &path[slash + 1..];
5124 self.subtrees
5125 .entry(component)
5126 .or_default()
5127 .insert(rest, mode, oid);
5128 }
5129 None => {
5130 self.blobs.insert(path.to_vec(), (mode, oid));
5131 }
5132 }
5133 }
5134
5135 fn write(&self, db: &FileObjectDatabase) -> Result<ObjectId> {
5136 let mut entries: Vec<TreeEntry> = Vec::new();
5137 for (name, (mode, oid)) in &self.blobs {
5138 entries.push(TreeEntry {
5139 mode: *mode,
5140 name: BString::from(name.clone()),
5141 oid: *oid,
5142 });
5143 }
5144 for (name, subtree) in &self.subtrees {
5145 let oid = subtree.write(db)?;
5146 entries.push(TreeEntry {
5147 mode: 0o040000,
5148 name: BString::from(name.clone()),
5149 oid,
5150 });
5151 }
5152 entries.sort_by_key(merge_tree_sort_key);
5153 let tree = Tree { entries };
5154 db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
5155 }
5156}
5157
5158fn merge_tree_sort_key(entry: &TreeEntry) -> Vec<u8> {
5159 let mut key = entry.name.as_bytes().to_vec();
5160 if entry.mode == 0o040000 {
5161 key.push(b'/');
5162 }
5163 key
5164}
5165
5166#[derive(Clone, Copy, PartialEq, Eq)]
5170enum RenameSide {
5171 Ours,
5172 Theirs,
5173}
5174
5175#[derive(Clone)]
5177struct MergeRename {
5178 source: Vec<u8>,
5179 side: RenameSide,
5180}
5181
5182#[derive(Clone)]
5186struct RenameDelete {
5187 source: Vec<u8>,
5189 side: RenameSide,
5191}
5192
5193#[derive(Default)]
5197struct MergeRenames {
5198 dest_to_source: BTreeMap<Vec<u8>, MergeRename>,
5202 rename_deletes: BTreeMap<Vec<u8>, RenameDelete>,
5205}
5206
5207struct SideRenames {
5212 pairs: Vec<(Vec<u8>, Vec<u8>)>,
5213}
5214
5215fn detect_merge_renames(
5225 db: &FileObjectDatabase,
5226 format: ObjectFormat,
5227 base_map: &MergeEntryMap,
5228 ours_map: &MergeEntryMap,
5229 theirs_map: &MergeEntryMap,
5230 options: &MergeTreesOptions<'_>,
5231) -> Result<(MergeRenames, SideRenames, SideRenames)> {
5232 let mut renames = MergeRenames::default();
5233
5234 let ours_side = collect_side_renames(
5236 db,
5237 format,
5238 base_map,
5239 ours_map,
5240 theirs_map,
5241 RenameSide::Ours,
5242 options.rename_threshold,
5243 &mut renames,
5244 )?;
5245 let theirs_side = collect_side_renames(
5247 db,
5248 format,
5249 base_map,
5250 theirs_map,
5251 ours_map,
5252 RenameSide::Theirs,
5253 options.rename_threshold,
5254 &mut renames,
5255 )?;
5256
5257 Ok((renames, ours_side, theirs_side))
5258}
5259
5260#[allow(clippy::too_many_arguments)]
5265fn collect_side_renames(
5266 db: &FileObjectDatabase,
5267 format: ObjectFormat,
5268 base_map: &MergeEntryMap,
5269 side_map: &MergeEntryMap,
5270 other_map: &MergeEntryMap,
5271 side: RenameSide,
5272 threshold: u8,
5273 renames: &mut MergeRenames,
5274) -> Result<SideRenames> {
5275 let base_tree = entry_map_as_tracked(base_map);
5278 let side_tree = entry_map_as_tracked(side_map);
5279 let options = RenameDetectionOptions {
5280 base: DiffNameStatusOptions {
5281 detect_renames: true,
5282 detect_copies: false,
5283 find_copies_harder: false,
5284 rename_empty: false,
5285 },
5286 detect_inexact: true,
5287 rename_threshold: threshold,
5288 copy_threshold: threshold,
5289 };
5290 let changes = diff_name_status_maps_with_renames(
5291 &base_tree,
5292 &side_tree,
5293 base_tree.keys().chain(side_tree.keys()),
5294 options,
5295 |oid| merge_blob_bytes(db, oid).ok(),
5296 )?;
5297
5298 let mut pairs = Vec::new();
5299 for change in changes {
5300 let NameStatus::Renamed(_) = change.status else {
5301 continue;
5302 };
5303 let Some(old_path) = change.old_path.as_ref() else {
5304 continue;
5305 };
5306 let old = old_path.as_bytes().to_vec();
5307 let new = change.path.as_bytes().to_vec();
5308 pairs.push((old.clone(), new.clone()));
5310
5311 if !other_map.contains_key(&old) {
5316 if base_map.contains_key(&old) && !other_map.contains_key(&new) {
5321 renames
5322 .rename_deletes
5323 .entry(new.clone())
5324 .or_insert(RenameDelete {
5325 source: old.clone(),
5326 side,
5327 });
5328 }
5329 continue;
5330 }
5331 if other_map.contains_key(&new) {
5334 continue;
5335 }
5336 renames
5340 .dest_to_source
5341 .entry(new)
5342 .or_insert(MergeRename { source: old, side });
5343 }
5344
5345 let _ = format;
5346 Ok(SideRenames { pairs })
5347}
5348
5349fn apply_merge_renames(
5355 base_map: &MergeEntryMap,
5356 ours_map: &MergeEntryMap,
5357 theirs_map: &MergeEntryMap,
5358 renames: &MergeRenames,
5359) -> (MergeEntryMap, MergeEntryMap, MergeEntryMap) {
5360 if renames.dest_to_source.is_empty() {
5361 return (base_map.clone(), ours_map.clone(), theirs_map.clone());
5362 }
5363 let mut base = base_map.clone();
5364 let mut ours = ours_map.clone();
5365 let mut theirs = theirs_map.clone();
5366
5367 for (new, rename) in &renames.dest_to_source {
5368 let old = &rename.source;
5369 if let Some(entry) = base.remove(old) {
5371 base.entry(new.clone()).or_insert(entry);
5372 }
5373 for side in [&mut ours, &mut theirs] {
5375 if let Some(entry) = side.remove(old) {
5376 side.entry(new.clone()).or_insert(entry);
5377 }
5378 }
5379 }
5380 (base, ours, theirs)
5381}
5382
5383fn parent_dir(path: &[u8]) -> Option<&[u8]> {
5387 path.iter().rposition(|b| *b == b'/').map(|i| &path[..i])
5388}
5389
5390fn apply_dir_rename(old_dir: &[u8], new_dir: &[u8], path: &[u8]) -> Vec<u8> {
5394 let rest_start = if new_dir.is_empty() {
5397 old_dir.len() + 1
5398 } else {
5399 old_dir.len()
5400 };
5401 let mut out = new_dir.to_vec();
5402 out.extend_from_slice(&path[rest_start..]);
5403 out
5404}
5405
5406fn check_dir_renamed<'a>(
5409 path: &[u8],
5410 dir_renames: &'a BTreeMap<Vec<u8>, Vec<u8>>,
5411) -> Option<(&'a [u8], &'a [u8])> {
5412 let mut cur = parent_dir(path);
5413 while let Some(dir) = cur {
5414 if let Some((old_dir, new_dir)) = dir_renames.get_key_value(dir) {
5415 return Some((old_dir.as_slice(), new_dir.as_slice()));
5416 }
5417 cur = parent_dir(dir);
5418 }
5419 None
5420}
5421
5422struct DirectoryRenameMaps {
5424 ours: BTreeMap<Vec<u8>, Vec<u8>>,
5427 theirs: BTreeMap<Vec<u8>, Vec<u8>>,
5429 split_dirs: BTreeSet<Vec<u8>>,
5432}
5433
5434fn compute_directory_renames(
5443 base_map: &MergeEntryMap,
5444 ours_map: &MergeEntryMap,
5445 theirs_map: &MergeEntryMap,
5446 ours_side: &SideRenames,
5447 theirs_side: &SideRenames,
5448) -> DirectoryRenameMaps {
5449 let ours = compute_side_dir_renames(&ours_side.pairs, base_map, ours_map);
5450 let theirs = compute_side_dir_renames(&theirs_side.pairs, base_map, theirs_map);
5451
5452 let mut split_dirs = BTreeSet::new();
5454 split_dirs.extend(ours.split.iter().cloned());
5455 split_dirs.extend(theirs.split.iter().cloned());
5456
5457 let mut ours_map_out = ours.renames;
5461 let mut theirs_map_out = theirs.renames;
5462 let dup: Vec<Vec<u8>> = ours_map_out
5463 .keys()
5464 .filter(|k| theirs_map_out.contains_key(*k))
5465 .cloned()
5466 .collect();
5467 for k in dup {
5468 ours_map_out.remove(&k);
5469 theirs_map_out.remove(&k);
5470 }
5471
5472 DirectoryRenameMaps {
5473 ours: ours_map_out,
5474 theirs: theirs_map_out,
5475 split_dirs,
5476 }
5477}
5478
5479struct SideDirRenames {
5481 renames: BTreeMap<Vec<u8>, Vec<u8>>,
5482 split: BTreeSet<Vec<u8>>,
5483}
5484
5485fn compute_side_dir_renames(
5488 pairs: &[(Vec<u8>, Vec<u8>)],
5489 base_map: &MergeEntryMap,
5490 side_map: &MergeEntryMap,
5491) -> SideDirRenames {
5492 let mut counts: BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>> = BTreeMap::new();
5494 for (old, new) in pairs {
5495 let old_dir = parent_dir(old);
5500 let new_dir = parent_dir(new);
5501 if old_dir == new_dir {
5502 continue;
5503 }
5504 let od = old_dir.map(<[u8]>::to_vec).unwrap_or_default();
5505 let nd = new_dir.map(<[u8]>::to_vec).unwrap_or_default();
5506 *counts.entry(od).or_default().entry(nd).or_default() += 1;
5507 }
5508
5509 let mut renames = BTreeMap::new();
5510 let mut split = BTreeSet::new();
5511 for (old_dir, targets) in counts {
5512 let mut max = 0usize;
5513 let mut bad_max = 0usize;
5514 let mut best: Option<Vec<u8>> = None;
5515 for (target, count) in &targets {
5516 if *count == max {
5517 bad_max = max;
5518 } else if *count > max {
5519 max = *count;
5520 best = Some(target.clone());
5521 }
5522 }
5523 if max == 0 {
5524 continue;
5525 }
5526 if bad_max == max {
5527 split.insert(old_dir);
5528 continue;
5529 }
5530 if let Some(best) = best
5534 && directory_fully_removed(&old_dir, base_map, side_map)
5535 {
5536 renames.insert(old_dir, best);
5537 }
5538 }
5539
5540 SideDirRenames { renames, split }
5541}
5542
5543fn directory_fully_removed(dir: &[u8], base_map: &MergeEntryMap, side_map: &MergeEntryMap) -> bool {
5546 let mut prefix = dir.to_vec();
5547 prefix.push(b'/');
5548 for path in base_map.keys() {
5549 if path.starts_with(&prefix) && side_map.contains_key(path) {
5550 return false;
5551 }
5552 }
5553 true
5554}
5555
5556fn apply_directory_renames(
5564 base_map: &MergeEntryMap,
5565 ours_map: &MergeEntryMap,
5566 theirs_map: &MergeEntryMap,
5567 dir_renames: &DirectoryRenameMaps,
5568) -> (MergeEntryMap, MergeEntryMap, BTreeSet<Vec<u8>>) {
5569 let mut ours = ours_map.clone();
5570 let mut theirs = theirs_map.clone();
5571 let mut rehomed = BTreeSet::new();
5572
5573 rehome_side(
5577 base_map,
5578 &mut theirs,
5579 &dir_renames.ours,
5580 &dir_renames.split_dirs,
5581 &mut rehomed,
5582 );
5583 rehome_side(
5584 base_map,
5585 &mut ours,
5586 &dir_renames.theirs,
5587 &dir_renames.split_dirs,
5588 &mut rehomed,
5589 );
5590
5591 (ours, theirs, rehomed)
5592}
5593
5594fn rehome_side(
5600 base_map: &MergeEntryMap,
5601 target: &mut MergeEntryMap,
5602 renamer_dirs: &BTreeMap<Vec<u8>, Vec<u8>>,
5603 split_dirs: &BTreeSet<Vec<u8>>,
5604 rehomed: &mut BTreeSet<Vec<u8>>,
5605) {
5606 if renamer_dirs.is_empty() {
5607 return;
5608 }
5609 let candidates: Vec<Vec<u8>> = target
5611 .keys()
5612 .filter(|p| !base_map.contains_key(*p))
5613 .cloned()
5614 .collect();
5615 for path in candidates {
5616 if let Some(dir) = parent_dir(&path)
5618 && split_dirs.contains(dir)
5619 {
5620 continue;
5621 }
5622 let Some((old_dir, new_dir)) = check_dir_renamed(&path, renamer_dirs) else {
5623 continue;
5624 };
5625 let dest = apply_dir_rename(old_dir, new_dir, &path);
5626 if dest == path {
5627 continue;
5628 }
5629 if let Some(entry) = target.remove(&path) {
5630 target.entry(dest.clone()).or_insert(entry);
5631 rehomed.insert(dest);
5632 }
5633 }
5634}
5635
5636fn qualify_label(label: &str, path: &[u8]) -> String {
5639 format!("{label}:{}", String::from_utf8_lossy(path))
5640}
5641
5642fn entry_map_as_tracked(map: &MergeEntryMap) -> BTreeMap<Vec<u8>, TrackedEntry> {
5645 map.iter()
5646 .map(|(path, (mode, oid))| {
5647 (
5648 path.clone(),
5649 TrackedEntry {
5650 mode: *mode,
5651 oid: *oid,
5652 },
5653 )
5654 })
5655 .collect()
5656}
5657
5658#[cfg(test)]
5659mod tests {
5660 use super::*;
5661 use sley_formats::RepositoryLayout;
5662 use sley_object::TreeEntry;
5663 use sley_odb::ObjectWriter;
5664 use std::path::PathBuf;
5665 use std::sync::atomic::{AtomicU64, Ordering};
5666
5667 static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
5668
5669 #[test]
5670 fn name_status_reports_added_from_index() {
5671 let root = temp_root();
5672 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5673 .expect("test operation should succeed");
5674 let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
5675 let oid = db
5676 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
5677 .expect("test operation should succeed");
5678 let index = Index {
5679 version: 2,
5680 entries: vec![sley_index::IndexEntry {
5681 ctime_seconds: 0,
5682 ctime_nanoseconds: 0,
5683 mtime_seconds: 0,
5684 mtime_nanoseconds: 0,
5685 dev: 0,
5686 ino: 0,
5687 mode: 0o100644,
5688 uid: 0,
5689 gid: 0,
5690 size: 6,
5691 oid,
5692 flags: "hello.txt".len() as u16,
5693 flags_extended: 0,
5694 path: BString::from(b"hello.txt"),
5695 }],
5696 extensions: Vec::new(),
5697 checksum: None,
5698 };
5699 fs::write(
5700 layout.git_dir.join("index"),
5701 index
5702 .write_v2_sha1()
5703 .expect("test operation should succeed"),
5704 )
5705 .expect("test operation should succeed");
5706 fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
5707 let changes = diff_name_status_head_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
5708 .expect("test operation should succeed");
5709 assert_eq!(changes[0].line(), "A\thello.txt");
5710 fs::remove_dir_all(root).expect("test operation should succeed");
5711 }
5712
5713 #[test]
5714 fn index_worktree_diff_returns_staged_gitlinks() {
5715 let root = temp_root();
5716 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5717 .expect("test operation should succeed");
5718 let oid = ObjectId::from_hex(
5719 ObjectFormat::Sha1,
5720 "1111111111111111111111111111111111111111",
5721 )
5722 .expect("test operation should succeed");
5723 let index = Index {
5724 version: 2,
5725 entries: vec![sley_index::IndexEntry {
5726 ctime_seconds: 0,
5727 ctime_nanoseconds: 0,
5728 mtime_seconds: 0,
5729 mtime_nanoseconds: 0,
5730 dev: 0,
5731 ino: 0,
5732 mode: 0o160000,
5733 uid: 0,
5734 gid: 0,
5735 size: 0,
5736 oid,
5737 flags: "deps/sub".len() as u16,
5738 flags_extended: 0,
5739 path: BString::from(b"deps/sub"),
5740 }],
5741 extensions: Vec::new(),
5742 checksum: None,
5743 };
5744 fs::write(
5745 layout.git_dir.join("index"),
5746 index
5747 .write_v2_sha1()
5748 .expect("test operation should succeed"),
5749 )
5750 .expect("test operation should succeed");
5751
5752 let diff = diff_name_status_index_worktree_with_options_and_gitlinks(
5753 &root,
5754 &layout.git_dir,
5755 ObjectFormat::Sha1,
5756 DiffNameStatusOptions::default(),
5757 )
5758 .expect("test operation should succeed");
5759
5760 assert_eq!(diff.entries.len(), 1);
5761 let gitlinks = diff.staged_gitlinks;
5762 assert_eq!(gitlinks.len(), 1);
5763 assert_eq!(gitlinks[0].path.as_bytes(), b"deps/sub");
5764 assert_eq!(gitlinks[0].oid, oid);
5765 fs::remove_dir_all(root).expect("test operation should succeed");
5766 }
5767
5768 #[cfg(unix)]
5769 #[test]
5770 fn index_worktree_diff_ignores_untracked_dangling_symlink() {
5771 use std::os::unix::fs::symlink;
5772
5773 let root = temp_root();
5774 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5775 .expect("test operation should succeed");
5776 let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
5777 let oid = db
5778 .write_object(EncodedObject::new(ObjectType::Blob, b"clean\n".to_vec()))
5779 .expect("test operation should succeed");
5780 let index = Index {
5781 version: 2,
5782 entries: vec![sley_index::IndexEntry {
5783 ctime_seconds: 0,
5784 ctime_nanoseconds: 0,
5785 mtime_seconds: 0,
5786 mtime_nanoseconds: 0,
5787 dev: 0,
5788 ino: 0,
5789 mode: 0o100644,
5790 uid: 0,
5791 gid: 0,
5792 size: 6,
5793 oid,
5794 flags: "tracked.txt".len() as u16,
5795 flags_extended: 0,
5796 path: BString::from(b"tracked.txt"),
5797 }],
5798 extensions: Vec::new(),
5799 checksum: None,
5800 };
5801 fs::write(
5802 layout.git_dir.join("index"),
5803 index
5804 .write_v2_sha1()
5805 .expect("test operation should succeed"),
5806 )
5807 .expect("test operation should succeed");
5808 fs::write(root.join("tracked.txt"), b"clean\n").expect("test operation should succeed");
5809 symlink("missing-target", root.join("untracked-link"))
5810 .expect("test operation should succeed");
5811
5812 let changes = diff_name_status_index_worktree_with_options(
5813 &root,
5814 &layout.git_dir,
5815 ObjectFormat::Sha1,
5816 DiffNameStatusOptions {
5817 detect_renames: false,
5818 detect_copies: false,
5819 find_copies_harder: false,
5820 rename_empty: true,
5821 },
5822 )
5823 .expect("untracked dangling symlink should be ignored");
5824 assert!(changes.is_empty());
5825 fs::remove_dir_all(root).expect("test operation should succeed");
5826 }
5827
5828 #[test]
5829 fn index_worktree_diff_trusts_non_racy_stat_cache() {
5830 let root = temp_root();
5831 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5832 .expect("test operation should succeed");
5833 let worktree_path = root.join("tracked.txt");
5834 fs::write(&worktree_path, b"clean\n").expect("test operation should succeed");
5835 let metadata = fs::symlink_metadata(&worktree_path).expect("test operation should succeed");
5836 let (mtime_seconds, mtime_nanoseconds) =
5837 sley_index::file_mtime_parts(&metadata).expect("test operation should succeed");
5838 let bogus_oid = ObjectId::from_hex(
5839 ObjectFormat::Sha1,
5840 "1111111111111111111111111111111111111111",
5841 )
5842 .expect("test operation should succeed");
5843 let index = Index {
5844 version: 2,
5845 entries: vec![sley_index::IndexEntry {
5846 ctime_seconds: 0,
5847 ctime_nanoseconds: 0,
5848 mtime_seconds: mtime_seconds as u32,
5849 mtime_nanoseconds: mtime_nanoseconds as u32,
5850 dev: 0,
5851 ino: 0,
5852 mode: sley_index::worktree_metadata_mode(&metadata),
5853 uid: 0,
5854 gid: 0,
5855 size: metadata.len() as u32,
5856 oid: bogus_oid,
5857 flags: "tracked.txt".len() as u16,
5858 flags_extended: 0,
5859 path: BString::from(b"tracked.txt"),
5860 }],
5861 extensions: Vec::new(),
5862 checksum: None,
5863 };
5864 std::thread::sleep(std::time::Duration::from_millis(1100));
5865 fs::write(
5866 layout.git_dir.join("index"),
5867 index
5868 .write_v2_sha1()
5869 .expect("test operation should succeed"),
5870 )
5871 .expect("test operation should succeed");
5872
5873 let changes = diff_name_status_index_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
5874 .expect("test operation should succeed");
5875 assert!(
5876 changes.is_empty(),
5877 "a clean non-racy stat match must reuse the cached index oid"
5878 );
5879 fs::remove_dir_all(root).expect("test operation should succeed");
5880 }
5881
5882 fn temp_root() -> PathBuf {
5883 let path = std::env::temp_dir().join(format!(
5884 "sley-diff-{}-{}",
5885 std::process::id(),
5886 TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
5887 ));
5888 fs::create_dir_all(&path).expect("test operation should succeed");
5889 path
5890 }
5891
5892 fn merge_opts() -> MergeBlobOptions<'static> {
5895 MergeBlobOptions {
5896 ours_label: "ours",
5897 theirs_label: "theirs",
5898 base_label: "base",
5899 style: ConflictStyle::Merge,
5900 }
5901 }
5902
5903 #[test]
5904 fn split_lines_preserves_content_and_newlines() {
5905 let lines = split_lines(b"a\nb\nc\n");
5906 assert_eq!(lines.len(), 3);
5907 assert_eq!(lines[0].content, b"a\n");
5908 assert!(lines[0].has_newline);
5909 assert_eq!(lines[2].content, b"c\n");
5910 assert!(lines[2].has_newline);
5911 assert!(split_lines(b"").is_empty());
5912 }
5913
5914 #[test]
5915 fn split_lines_tracks_missing_final_newline() {
5916 let lines = split_lines(b"a\nb");
5917 assert_eq!(lines.len(), 2);
5918 assert!(lines[0].has_newline);
5919 assert!(!lines[1].has_newline);
5920 assert_eq!(lines[1].content, b"b");
5921 assert_eq!(lines[1].bytes_without_newline(), b"b");
5922 let with_nl = split_lines(b"b\n");
5924 assert_ne!(lines[1], with_nl[0]);
5925 }
5926
5927 #[test]
5928 fn myers_replace_single_line() {
5929 let old = split_lines(b"a\nb\nc\n");
5930 let new = split_lines(b"a\nx\nc\n");
5931 assert_eq!(
5932 myers_diff_lines(&old, &new),
5933 vec![
5934 DiffOp::Equal(1),
5935 DiffOp::Delete(1),
5936 DiffOp::Insert(1),
5937 DiffOp::Equal(1),
5938 ]
5939 );
5940 }
5941
5942 #[test]
5943 fn myers_identical_is_single_equal() {
5944 let old = split_lines(b"a\nb\nc\n");
5945 let new = split_lines(b"a\nb\nc\n");
5946 assert_eq!(myers_diff_lines(&old, &new), vec![DiffOp::Equal(3)]);
5947 }
5948
5949 #[test]
5950 fn myers_pure_insert_and_delete() {
5951 let empty = split_lines(b"");
5952 let two = split_lines(b"a\nb\n");
5953 assert_eq!(myers_diff_lines(&empty, &two), vec![DiffOp::Insert(2)]);
5954 assert_eq!(myers_diff_lines(&two, &empty), vec![DiffOp::Delete(2)]);
5955
5956 let old = split_lines(b"a\nb\nc\nd\n");
5957 let new = split_lines(b"a\nc\nd\n");
5958 assert_eq!(
5959 myers_diff_lines(&old, &new),
5960 vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)]
5961 );
5962 }
5963
5964 #[test]
5965 fn myers_reconstructs_new_and_is_minimal() {
5966 let old = split_lines(b"the\nquick\nbrown\nfox\n");
5968 let new = split_lines(b"the\nlazy\nbrown\ncat\n");
5969 let ops = myers_diff_lines(&old, &new);
5970 let mut oi = 0usize;
5971 let mut ni = 0usize;
5972 let mut edits = 0usize;
5973 let mut rebuilt: Vec<u8> = Vec::new();
5974 for op in &ops {
5975 match *op {
5976 DiffOp::Equal(n) => {
5977 for _ in 0..n {
5978 assert_eq!(old[oi], new[ni]);
5979 rebuilt.extend_from_slice(old[oi].content);
5980 oi += 1;
5981 ni += 1;
5982 }
5983 }
5984 DiffOp::Delete(n) => {
5985 oi += n;
5986 edits += n;
5987 }
5988 DiffOp::Insert(n) => {
5989 for _ in 0..n {
5990 rebuilt.extend_from_slice(new[ni].content);
5991 ni += 1;
5992 }
5993 edits += n;
5994 }
5995 }
5996 }
5997 assert_eq!(rebuilt, b"the\nlazy\nbrown\ncat\n");
5998 assert_eq!(edits, 4);
6000 }
6001
6002 #[test]
6003 fn merge_non_overlapping_changes_is_clean() {
6004 let base = b"a\nb\nc\nd\ne\n";
6005 let ours = b"A\nb\nc\nd\ne\n";
6006 let theirs = b"a\nb\nc\nd\nE\n";
6007 let result = merge_blobs(base, ours, theirs, &merge_opts());
6008 assert!(!result.conflicted);
6009 assert_eq!(result.content, b"A\nb\nc\nd\nE\n");
6010 }
6011
6012 #[test]
6013 fn merge_identical_changes_no_conflict() {
6014 let base = b"a\nb\nc\n";
6015 let ours = b"a\nX\nc\n";
6016 let theirs = b"a\nX\nc\n";
6017 let result = merge_blobs(base, ours, theirs, &merge_opts());
6018 assert!(!result.conflicted);
6019 assert_eq!(result.content, b"a\nX\nc\n");
6020 }
6021
6022 #[test]
6023 fn merge_overlapping_change_emits_exact_markers() {
6024 let base = b"a\nb\nc\n";
6025 let ours = b"a\nOURS\nc\n";
6026 let theirs = b"a\nTHEIRS\nc\n";
6027 let result = merge_blobs(base, ours, theirs, &merge_opts());
6028 assert!(result.conflicted);
6029 assert_eq!(
6030 result.content,
6031 b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\nc\n".to_vec(),
6032 );
6033 }
6034
6035 #[test]
6036 fn merge_diff3_style_includes_base_section() {
6037 let base = b"a\nb\nc\n";
6038 let ours = b"a\nOURS\nc\n";
6039 let theirs = b"a\nTHEIRS\nc\n";
6040 let options = MergeBlobOptions {
6041 style: ConflictStyle::Diff3,
6042 ..merge_opts()
6043 };
6044 let result = merge_blobs(base, ours, theirs, &options);
6045 assert!(result.conflicted);
6046 assert_eq!(
6047 result.content,
6048 b"a\n<<<<<<< ours\nOURS\n||||||| base\nb\n=======\nTHEIRS\n>>>>>>> theirs\nc\n"
6049 .to_vec(),
6050 );
6051 }
6052
6053 #[test]
6054 fn merge_empty_label_omits_trailing_space() {
6055 let base = b"a\nb\nc\n";
6056 let ours = b"a\nOURS\nc\n";
6057 let theirs = b"a\nTHEIRS\nc\n";
6058 let options = MergeBlobOptions {
6059 ours_label: "",
6060 theirs_label: "",
6061 base_label: "",
6062 style: ConflictStyle::Merge,
6063 };
6064 let result = merge_blobs(base, ours, theirs, &options);
6065 assert!(result.conflicted);
6066 assert_eq!(
6068 result.content,
6069 b"a\n<<<<<<<\nOURS\n=======\nTHEIRS\n>>>>>>>\nc\n".to_vec(),
6070 );
6071 }
6072
6073 #[test]
6074 fn merge_add_add_empty_base_conflicts() {
6075 let result = merge_blobs(b"", b"x\ny\n", b"p\nq\n", &merge_opts());
6076 assert!(result.conflicted);
6077 assert_eq!(
6078 result.content,
6079 b"<<<<<<< ours\nx\ny\n=======\np\nq\n>>>>>>> theirs\n".to_vec(),
6080 );
6081 }
6082
6083 #[test]
6084 fn merge_add_add_empty_base_identical_is_clean() {
6085 let result = merge_blobs(b"", b"x\ny\n", b"x\ny\n", &merge_opts());
6086 assert!(!result.conflicted);
6087 assert_eq!(result.content, b"x\ny\n");
6088 }
6089
6090 #[test]
6091 fn merge_deletion_one_side_takes_deletion() {
6092 let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nb\nc\n", &merge_opts());
6094 assert!(!result.conflicted);
6095 assert_eq!(result.content, b"a\nc\n");
6096 }
6097
6098 #[test]
6099 fn merge_deletion_vs_modification_conflicts() {
6100 let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nB!\nc\n", &merge_opts());
6102 assert!(result.conflicted);
6103 assert_eq!(
6105 result.content,
6106 b"a\n<<<<<<< ours\n=======\nB!\n>>>>>>> theirs\nc\n".to_vec(),
6107 );
6108 }
6109
6110 #[test]
6111 fn merge_missing_final_newline_marker_starts_on_own_line() {
6112 let base = b"a\nb";
6115 let ours = b"a\nOURS";
6116 let theirs = b"a\nTHEIRS";
6117 let result = merge_blobs(base, ours, theirs, &merge_opts());
6118 assert!(result.conflicted);
6119 assert_eq!(
6120 result.content,
6121 b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\n".to_vec(),
6122 );
6123 }
6124
6125 #[test]
6126 fn merge_clean_preserves_missing_final_newline() {
6127 let result = merge_blobs(b"a\nb\n", b"a\nb", b"a\nb\n", &merge_opts());
6130 assert!(!result.conflicted);
6131 assert_eq!(result.content, b"a\nb");
6132 }
6133
6134 #[test]
6135 fn merge_both_append_identical_tail_is_clean() {
6136 let result = merge_blobs(b"a\n", b"a\nz\n", b"a\nz\n", &merge_opts());
6137 assert!(!result.conflicted);
6138 assert_eq!(result.content, b"a\nz\n");
6139 }
6140
6141 #[test]
6142 fn merge_when_ours_equals_base_yields_theirs() {
6143 let base = b"b\na\n";
6146 let theirs = b"b\nb\nc\na\nc\n";
6147 let result = merge_blobs(base, base, theirs, &merge_opts());
6148 assert!(!result.conflicted);
6149 assert_eq!(result.content, theirs.to_vec());
6150 }
6151 fn applied(outcome: ApplyOutcome) -> Vec<u8> {
6152 match outcome {
6153 ApplyOutcome::Applied(bytes) => bytes,
6154 ApplyOutcome::Rejected => panic!("expected Applied, got Rejected"),
6155 }
6156 }
6157
6158 #[test]
6159 fn parse_multi_file_patch() {
6160 let patch = b"\
6161diff --git a/one.txt b/one.txt
6162index aaaaaaa..bbbbbbb 100644
6163--- a/one.txt
6164+++ b/one.txt
6165@@ -1,3 +1,3 @@
6166 alpha
6167-beta
6168+BETA
6169 gamma
6170diff --git a/two.txt b/two.txt
6171index ccccccc..ddddddd 100644
6172--- a/two.txt
6173+++ b/two.txt
6174@@ -1,2 +1,3 @@
6175 first
6176+inserted
6177 second
6178";
6179 let patches = parse_unified_patch(patch).expect("test operation should succeed");
6180 assert_eq!(patches.len(), 2);
6181
6182 assert_eq!(patches[0].old_path.as_deref(), Some(b"one.txt".as_slice()));
6183 assert_eq!(patches[0].new_path.as_deref(), Some(b"one.txt".as_slice()));
6184 assert_eq!(patches[0].old_mode, None);
6185 assert_eq!(patches[0].hunks.len(), 1);
6186 let h = &patches[0].hunks[0];
6187 assert_eq!(
6188 (h.old_start, h.old_len, h.new_start, h.new_len),
6189 (1, 3, 1, 3)
6190 );
6191 assert_eq!(
6192 h.lines,
6193 vec![
6194 HunkLine::Context(b"alpha".to_vec()),
6195 HunkLine::Delete(b"beta".to_vec()),
6196 HunkLine::Insert(b"BETA".to_vec()),
6197 HunkLine::Context(b"gamma".to_vec()),
6198 ]
6199 );
6200
6201 assert_eq!(patches[1].new_path.as_deref(), Some(b"two.txt".as_slice()));
6202 assert_eq!(patches[1].hunks[0].new_len, 3);
6203 }
6204
6205 #[test]
6206 fn parse_default_hunk_range_length() {
6207 let patch = b"\
6209--- a/x
6210+++ b/x
6211@@ -1 +1,2 @@
6212 line
6213+added
6214";
6215 let patches = parse_unified_patch(patch).expect("test operation should succeed");
6216 let h = &patches[0].hunks[0];
6217 assert_eq!(
6218 (h.old_start, h.old_len, h.new_start, h.new_len),
6219 (1, 1, 1, 2)
6220 );
6221 }
6222
6223 #[test]
6224 fn parse_hunk_header_before_file_errors() {
6225 let patch = b"@@ -1,1 +1,1 @@\n context\n";
6226 assert!(parse_unified_patch(patch).is_err());
6227 }
6228
6229 #[test]
6230 fn parse_mismatched_counts_errors() {
6231 let patch = b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n only\n+new\n";
6233 assert!(parse_unified_patch(patch).is_err());
6234 }
6235
6236 #[test]
6237 fn apply_clean_hunk() {
6238 let base = b"alpha\nbeta\ngamma\n";
6239 let patch = parse_unified_patch(
6240 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6241 )
6242 .expect("test operation should succeed");
6243 let out = applied(apply_file_patch(base, &patch[0]));
6244 assert_eq!(out, b"alpha\nBETA\ngamma\n");
6245 }
6246
6247 #[test]
6248 fn apply_with_line_offset() {
6249 let base = b"pre1\npre2\npre3\nalpha\nbeta\ngamma\ntail\n";
6256 let patch = parse_unified_patch(
6257 b"--- a/x\n+++ b/x\n@@ -2,4 +2,4 @@\n alpha\n-beta\n+BETA\n gamma\n tail\n",
6258 )
6259 .expect("test operation should succeed");
6260 let out = applied(apply_file_patch(base, &patch[0]));
6261 assert_eq!(out, b"pre1\npre2\npre3\nalpha\nBETA\ngamma\ntail\n");
6262 }
6263
6264 #[test]
6265 fn apply_with_negative_line_offset() {
6266 let base = b"alpha\nbeta\ngamma\n";
6268 let patch = parse_unified_patch(
6269 b"--- a/x\n+++ b/x\n@@ -50,3 +50,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6270 )
6271 .expect("test operation should succeed");
6272 let out = applied(apply_file_patch(base, &patch[0]));
6273 assert_eq!(out, b"alpha\nBETA\ngamma\n");
6274 }
6275
6276 #[test]
6277 fn apply_multiple_hunks() {
6278 let base = b"a\nb\nc\nd\ne\nf\ng\nh\n";
6279 let patch = parse_unified_patch(
6280 b"--- a/x\n+++ b/x\n\
6281@@ -1,3 +1,3 @@\n a\n-b\n+B\n c\n\
6282@@ -6,3 +6,3 @@\n f\n-g\n+G\n h\n",
6283 )
6284 .expect("test operation should succeed");
6285 let out = applied(apply_file_patch(base, &patch[0]));
6286 assert_eq!(out, b"a\nB\nc\nd\ne\nf\nG\nh\n");
6287 }
6288
6289 #[test]
6290 fn reject_on_context_mismatch() {
6291 let base = b"alpha\nDIFFERENT\ngamma\n";
6292 let patch = parse_unified_patch(
6293 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6294 )
6295 .expect("test operation should succeed");
6296 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6297 }
6298
6299 #[test]
6300 fn reject_when_match_end_required_but_not_at_eof() {
6301 let base = b"one\ntwo\nanchor\nalready\nappended\n";
6308 let patch = parse_unified_patch(
6312 b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n",
6313 )
6314 .expect("test operation should succeed");
6315 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6316 }
6317
6318 #[test]
6319 fn append_at_eof_matches_when_context_reaches_end() {
6320 let base = b"one\ntwo\nanchor\n";
6324 let patch = parse_unified_patch(
6325 b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n",
6326 )
6327 .expect("test operation should succeed");
6328 let out = applied(apply_file_patch(base, &patch[0]));
6329 assert_eq!(out, b"one\ntwo\nanchor\nadded1\nadded2\n");
6330 }
6331
6332 #[test]
6333 fn reject_when_match_beginning_required_but_not_at_start() {
6334 let base = b"junk\nalpha\nbeta\ngamma\n";
6338 let patch = parse_unified_patch(
6339 b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n alpha\n+INSERT\n beta\n",
6340 )
6341 .expect("test operation should succeed");
6342 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6343 }
6344
6345 #[test]
6346 fn no_default_fuzz_rejects_on_trailing_context_mismatch() {
6347 let base = b"alpha\nbeta\nDIVERGED\n";
6353 let patch = parse_unified_patch(
6354 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6355 )
6356 .expect("test operation should succeed");
6357 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6358 }
6359
6360 #[test]
6361 fn parse_and_apply_new_file() {
6362 let patch = parse_unified_patch(
6363 b"\
6364diff --git a/new.txt b/new.txt
6365new file mode 100644
6366index 0000000..1111111
6367--- /dev/null
6368+++ b/new.txt
6369@@ -0,0 +1,2 @@
6370+hello
6371+world
6372",
6373 )
6374 .expect("test operation should succeed");
6375 assert!(patches_first_is_new(&patch));
6376 assert_eq!(patch[0].old_path, None);
6377 assert_eq!(patch[0].new_path.as_deref(), Some(b"new.txt".as_slice()));
6378 assert_eq!(patch[0].new_mode, Some(0o100644));
6379 let out = applied(apply_file_patch(b"garbage that is ignored", &patch[0]));
6381 assert_eq!(out, b"hello\nworld\n");
6382 }
6383
6384 fn patches_first_is_new(patches: &[FilePatch]) -> bool {
6385 patches.first().map(|p| p.is_new).unwrap_or(false)
6386 }
6387
6388 #[test]
6389 fn parse_and_apply_delete_file() {
6390 let patch = parse_unified_patch(
6391 b"\
6392diff --git a/gone.txt b/gone.txt
6393deleted file mode 100644
6394index 1111111..0000000
6395--- a/gone.txt
6396+++ /dev/null
6397@@ -1,2 +0,0 @@
6398-hello
6399-world
6400",
6401 )
6402 .expect("test operation should succeed");
6403 assert!(patch[0].is_delete);
6404 assert_eq!(patch[0].old_path.as_deref(), Some(b"gone.txt".as_slice()));
6405 assert_eq!(patch[0].new_path, None);
6406 assert_eq!(patch[0].old_mode, Some(0o100644));
6407 let out = applied(apply_file_patch(b"hello\nworld\n", &patch[0]));
6408 assert_eq!(out, b"");
6409 }
6410
6411 #[test]
6412 fn parse_rename_headers() {
6413 let patch = parse_unified_patch(
6414 b"\
6415diff --git a/old/name.txt b/new/name.txt
6416similarity index 100%
6417rename from old/name.txt
6418rename to new/name.txt
6419",
6420 )
6421 .expect("test operation should succeed");
6422 assert!(patch[0].is_rename);
6423 assert_eq!(
6424 patch[0].old_path.as_deref(),
6425 Some(b"old/name.txt".as_slice())
6426 );
6427 assert_eq!(
6428 patch[0].new_path.as_deref(),
6429 Some(b"new/name.txt".as_slice())
6430 );
6431 assert!(patch[0].hunks.is_empty());
6432 }
6433
6434 #[test]
6435 fn parse_mode_change_headers() {
6436 let patch = parse_unified_patch(
6437 b"\
6438diff --git a/script.sh b/script.sh
6439old mode 100644
6440new mode 100755
6441",
6442 )
6443 .expect("test operation should succeed");
6444 assert_eq!(patch[0].old_mode, Some(0o100644));
6445 assert_eq!(patch[0].new_mode, Some(0o100755));
6446 assert!(!patch[0].is_new);
6447 assert!(!patch[0].is_delete);
6448 }
6449
6450 #[test]
6451 fn no_final_newline_base_preserved_when_untouched() {
6452 let base = b"alpha\nbeta\nnotail"; let patch = parse_unified_patch(
6462 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n-alpha\n+ALPHA\n beta\n notail\n\\ No newline at end of file\n",
6463 )
6464 .expect("test operation should succeed");
6465 let out = applied(apply_file_patch(base, &patch[0]));
6466 assert_eq!(out, b"ALPHA\nbeta\nnotail");
6467 }
6468
6469 #[test]
6470 fn no_final_newline_added_by_patch() {
6471 let base = b"alpha\nbeta\n";
6474 let patch = parse_unified_patch(
6475 b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n",
6476 )
6477 .expect("test operation should succeed");
6478 assert!(patch[0].hunks[0].new_no_newline);
6479 assert!(!patch[0].hunks[0].old_no_newline);
6480 let out = applied(apply_file_patch(base, &patch[0]));
6481 assert_eq!(out, b"alpha\nbeta-notail");
6482 }
6483
6484 #[test]
6485 fn no_final_newline_in_base_matched_and_kept() {
6486 let base = b"alpha\nbeta"; let patch = parse_unified_patch(
6490 b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n-alpha\n+ALPHA\n beta\n\\ No newline at end of file\n",
6491 )
6492 .expect("test operation should succeed");
6493 assert!(patch[0].hunks[0].old_no_newline);
6494 assert!(patch[0].hunks[0].new_no_newline);
6495 let out = applied(apply_file_patch(base, &patch[0]));
6496 assert_eq!(out, b"ALPHA\nbeta");
6497 }
6498
6499 #[test]
6500 fn no_final_newline_mismatch_rejected() {
6501 let base = b"alpha\nbeta\n"; let patch = parse_unified_patch(
6505 b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n\\ No newline at end of file\n+beta2\n",
6506 )
6507 .expect("test operation should succeed");
6508 assert!(patch[0].hunks[0].old_no_newline);
6509 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6510 }
6511
6512 #[test]
6513 fn delete_with_no_final_newline() {
6514 let base = b"only line no newline";
6516 let patch = parse_unified_patch(
6517 b"--- a/x\n+++ /dev/null\n@@ -1,1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n",
6518 )
6519 .expect("test operation should succeed");
6520 assert!(patch[0].is_delete);
6521 let out = applied(apply_file_patch(base, &patch[0]));
6522 assert_eq!(out, b"");
6523 }
6524
6525 #[test]
6526 fn apply_pure_insertion_hunk() {
6527 let base = b"first\nsecond\n";
6528 let patch =
6529 parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n first\n+middle\n second\n")
6530 .expect("test operation should succeed");
6531 let out = applied(apply_file_patch(base, &patch[0]));
6532 assert_eq!(out, b"first\nmiddle\nsecond\n");
6533 }
6534
6535 #[test]
6536 fn apply_pure_deletion_hunk() {
6537 let base = b"first\nmiddle\nsecond\n";
6538 let patch =
6539 parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,2 @@\n first\n-middle\n second\n")
6540 .expect("test operation should succeed");
6541 let out = applied(apply_file_patch(base, &patch[0]));
6542 assert_eq!(out, b"first\nsecond\n");
6543 }
6544
6545 #[test]
6546 fn apply_then_reparse_round_trip() {
6547 let base = b"l1\nl2\nl3\nl4\nl5\n";
6551 let text = b"--- a/f\n+++ b/f\n@@ -2,3 +2,4 @@\n l2\n-l3\n+L3\n+L3b\n l4\n";
6552 let p1 = parse_unified_patch(text).expect("test operation should succeed");
6553 let p2 = parse_unified_patch(text).expect("test operation should succeed");
6554 assert_eq!(p1, p2);
6555 let out = applied(apply_file_patch(base, &p1[0]));
6556 assert_eq!(out, b"l1\nl2\nL3\nL3b\nl4\nl5\n");
6557 }
6558
6559 #[test]
6560 fn empty_context_line_without_trailing_space() {
6561 let base = b"a\n\nb\n";
6564 let patch = parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n a\n\n-b\n+B\n")
6565 .expect("test operation should succeed");
6566 assert_eq!(patch[0].hunks[0].lines[1], HunkLine::Context(Vec::new()));
6567 let out = applied(apply_file_patch(base, &patch[0]));
6568 assert_eq!(out, b"a\n\nB\n");
6569 }
6570
6571 #[test]
6572 fn split_blob_lines_handles_edge_cases() {
6573 assert!(split_blob_lines(b"").is_empty());
6574 let single = split_blob_lines(b"abc");
6575 assert_eq!(single.len(), 1);
6576 assert!(single[0].no_newline);
6577 let terminated = split_blob_lines(b"abc\n");
6578 assert_eq!(terminated.len(), 1);
6579 assert!(!terminated[0].no_newline);
6580 let blank_then_eof = split_blob_lines(b"x\n");
6581 assert_eq!(blank_then_eof.len(), 1);
6582 }
6583
6584 #[test]
6587 fn similarity_identical_and_empty_conventions() {
6588 assert_eq!(blob_similarity(b"hello\nworld\n", b"hello\nworld\n"), 100);
6590 assert_eq!(blob_similarity(b"", b""), 100);
6592 assert_eq!(blob_similarity(b"", b"hello\n"), 0);
6594 assert_eq!(blob_similarity(b"hello\n", b""), 0);
6595 }
6596
6597 #[test]
6598 fn similarity_one_changed_line_is_75_and_symmetric() {
6599 let a = b"one\ntwo\nthree\nfour\nfive\n";
6605 let b = b"one\ntwo\nTHREE\nfour\nfive\n";
6606 assert_eq!(blob_similarity(a, b), 75);
6607 assert_eq!(blob_similarity(b, a), 75);
6609 }
6610
6611 #[test]
6612 fn similarity_one_edited_line_of_three_is_66_not_67() {
6613 assert_eq!(blob_similarity(b"a\nb\nc\n", b"a\nB\nc\n"), 66);
6619 assert_eq!(blob_similarity(b"a\nB\nc\n", b"a\nb\nc\n"), 66);
6620 }
6621
6622 #[test]
6623 fn similarity_small_append_is_88() {
6624 let a = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n";
6628 let b = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n";
6629 assert_eq!(blob_similarity(a, b), 88);
6630 }
6631
6632 #[test]
6633 fn similarity_half_rewrite_is_50() {
6634 let a = b"l1\nl2\nl3\nl4\nl5\nl6\n";
6637 let b = b"l1\nl2\nl3\nX4\nX5\nX6\n";
6638 assert_eq!(blob_similarity(a, b), 50);
6639 }
6640
6641 fn write_blob(db: &mut FileObjectDatabase, bytes: &[u8]) -> ObjectId {
6645 db.write_object(EncodedObject::new(ObjectType::Blob, bytes.to_vec()))
6646 .expect("test operation should succeed")
6647 }
6648
6649 fn write_tree(db: &mut FileObjectDatabase, entries: &[(&[u8], u32, ObjectId)]) -> ObjectId {
6652 let mut tree_entries: Vec<TreeEntry> = entries
6653 .iter()
6654 .map(|(name, mode, oid)| TreeEntry {
6655 mode: *mode,
6656 name: BString::from(*name),
6657 oid: *oid,
6658 })
6659 .collect();
6660 tree_entries.sort_by(|a, b| a.name.cmp(&b.name));
6661 let tree = Tree {
6662 entries: tree_entries,
6663 };
6664 db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
6665 .expect("test operation should succeed")
6666 }
6667
6668 #[test]
6669 fn inexact_rename_detected_with_plausible_score() {
6670 let root = temp_root();
6673 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6674 .expect("test operation should succeed");
6675 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6676
6677 let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
6678 let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
6679 let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
6680 let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
6681
6682 let opts = RenameDetectionOptions {
6683 base: DiffNameStatusOptions {
6684 detect_renames: true,
6685 detect_copies: false,
6686 find_copies_harder: false,
6687 rename_empty: true,
6688 },
6689 detect_inexact: true,
6690 rename_threshold: DEFAULT_RENAME_THRESHOLD,
6691 copy_threshold: DEFAULT_RENAME_THRESHOLD,
6692 };
6693 let entries = diff_name_status_trees_with_rename_options(
6694 &db,
6695 ObjectFormat::Sha1,
6696 &left,
6697 &right,
6698 opts,
6699 )
6700 .expect("test operation should succeed");
6701
6702 assert_eq!(
6703 entries.len(),
6704 1,
6705 "expected a single rename entry: {entries:?}"
6706 );
6707 assert_eq!(entries[0].status, NameStatus::Renamed(75));
6708 assert_eq!(
6709 entries[0].old_path.as_ref().map(|p| p.as_bytes()),
6710 Some(b"a.txt".as_slice())
6711 );
6712 assert_eq!(entries[0].path, b"b.txt");
6713 assert_eq!(entries[0].line(), "R075\ta.txt\tb.txt");
6714 fs::remove_dir_all(root).expect("test operation should succeed");
6715 }
6716
6717 #[test]
6718 fn inexact_rename_below_threshold_not_detected() {
6719 let root = temp_root();
6722 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6723 .expect("test operation should succeed");
6724 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6725
6726 let old = write_blob(&mut db, b"l1\nl2\nl3\nl4\nl5\nl6\n");
6727 let new = write_blob(&mut db, b"l1\nl2\nl3\nX4\nX5\nX6\n");
6728 let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
6729 let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
6730
6731 let opts = RenameDetectionOptions {
6732 base: DiffNameStatusOptions {
6733 detect_renames: true,
6734 detect_copies: false,
6735 find_copies_harder: false,
6736 rename_empty: true,
6737 },
6738 detect_inexact: true,
6739 rename_threshold: 60,
6740 copy_threshold: 60,
6741 };
6742 let entries = diff_name_status_trees_with_rename_options(
6743 &db,
6744 ObjectFormat::Sha1,
6745 &left,
6746 &right,
6747 opts,
6748 )
6749 .expect("test operation should succeed");
6750
6751 let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
6752 assert!(
6753 statuses.contains(&NameStatus::Added) && statuses.contains(&NameStatus::Deleted),
6754 "expected separate add/delete below threshold, got {entries:?}"
6755 );
6756 assert!(
6757 !statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))),
6758 "no rename should be reported below threshold: {entries:?}"
6759 );
6760
6761 let opts_low = RenameDetectionOptions {
6764 rename_threshold: 50,
6765 ..opts
6766 };
6767 let entries_low = diff_name_status_trees_with_rename_options(
6768 &db,
6769 ObjectFormat::Sha1,
6770 &left,
6771 &right,
6772 opts_low,
6773 )
6774 .expect("test operation should succeed");
6775 assert_eq!(entries_low.len(), 1);
6776 assert_eq!(entries_low[0].status, NameStatus::Renamed(50));
6777 fs::remove_dir_all(root).expect("test operation should succeed");
6778 }
6779
6780 #[test]
6781 fn exact_rename_scores_100_and_takes_priority() {
6782 let root = temp_root();
6785 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6786 .expect("test operation should succeed");
6787 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6788
6789 let oid = write_blob(&mut db, b"identical\ncontent\nhere\n");
6790 let left = write_tree(&mut db, &[(b"old.txt", 0o100644, oid)]);
6791 let right = write_tree(&mut db, &[(b"new.txt", 0o100644, oid)]);
6792
6793 for inexact in [false, true] {
6794 let opts = RenameDetectionOptions {
6795 base: DiffNameStatusOptions {
6796 detect_renames: true,
6797 detect_copies: false,
6798 find_copies_harder: false,
6799 rename_empty: true,
6800 },
6801 detect_inexact: inexact,
6802 rename_threshold: DEFAULT_RENAME_THRESHOLD,
6803 copy_threshold: DEFAULT_RENAME_THRESHOLD,
6804 };
6805 let entries = diff_name_status_trees_with_rename_options(
6806 &db,
6807 ObjectFormat::Sha1,
6808 &left,
6809 &right,
6810 opts,
6811 )
6812 .expect("test operation should succeed");
6813 assert_eq!(entries.len(), 1, "inexact={inexact}: {entries:?}");
6814 assert_eq!(entries[0].status, NameStatus::Renamed(100));
6815 assert_eq!(
6816 entries[0].old_path.as_ref().map(|p| p.as_bytes()),
6817 Some(b"old.txt".as_slice())
6818 );
6819 assert_eq!(entries[0].path, b"new.txt");
6820 }
6821 fs::remove_dir_all(root).expect("test operation should succeed");
6822 }
6823
6824 #[test]
6825 fn inexact_copy_detected_with_score() {
6826 let root = temp_root();
6831 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6832 .expect("test operation should succeed");
6833 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6834
6835 let orig = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\neee\n");
6836 let copy = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\nEEE\n");
6837 let left = write_tree(&mut db, &[(b"orig.txt", 0o100644, orig.clone())]);
6838 let right = write_tree(
6839 &mut db,
6840 &[(b"orig.txt", 0o100644, orig), (b"copy.txt", 0o100644, copy)],
6841 );
6842
6843 let opts = RenameDetectionOptions {
6844 base: DiffNameStatusOptions {
6845 detect_renames: true,
6846 detect_copies: true,
6847 find_copies_harder: true,
6848 rename_empty: true,
6849 },
6850 detect_inexact: true,
6851 rename_threshold: DEFAULT_RENAME_THRESHOLD,
6852 copy_threshold: DEFAULT_RENAME_THRESHOLD,
6853 };
6854 let entries = diff_name_status_trees_with_rename_options(
6855 &db,
6856 ObjectFormat::Sha1,
6857 &left,
6858 &right,
6859 opts,
6860 )
6861 .expect("test operation should succeed");
6862
6863 let copy_entry = entries
6864 .iter()
6865 .find(|e| e.path == b"copy.txt")
6866 .unwrap_or_else(|| panic!("no copy.txt entry: {entries:?}"));
6867 assert_eq!(copy_entry.status, NameStatus::Copied(80));
6868 assert_eq!(
6869 copy_entry.old_path.as_ref().map(|p| p.as_bytes()),
6870 Some(b"orig.txt".as_slice())
6871 );
6872 assert!(
6874 entries.iter().all(|e| e.status != NameStatus::Deleted),
6875 "copy must not delete the source: {entries:?}"
6876 );
6877 fs::remove_dir_all(root).expect("test operation should succeed");
6878 }
6879
6880 #[test]
6881 fn inexact_rename_with_small_edit_scores_88() {
6882 let root = temp_root();
6885 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6886 .expect("test operation should succeed");
6887 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6888
6889 let old = write_blob(
6890 &mut db,
6891 b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n",
6892 );
6893 let new = write_blob(
6894 &mut db,
6895 b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n",
6896 );
6897 let left = write_tree(&mut db, &[(b"src.txt", 0o100644, old)]);
6898 let right = write_tree(&mut db, &[(b"dst.txt", 0o100644, new)]);
6899
6900 let opts = RenameDetectionOptions::inexact(DiffNameStatusOptions {
6901 detect_renames: true,
6902 detect_copies: false,
6903 find_copies_harder: false,
6904 rename_empty: true,
6905 });
6906 let entries = diff_name_status_trees_with_rename_options(
6907 &db,
6908 ObjectFormat::Sha1,
6909 &left,
6910 &right,
6911 opts,
6912 )
6913 .expect("test operation should succeed");
6914
6915 assert_eq!(entries.len(), 1, "{entries:?}");
6916 assert_eq!(entries[0].status, NameStatus::Renamed(88));
6917 assert_eq!(
6918 entries[0].old_path.as_ref().map(|p| p.as_bytes()),
6919 Some(b"src.txt".as_slice())
6920 );
6921 assert_eq!(entries[0].path, b"dst.txt");
6922 fs::remove_dir_all(root).expect("test operation should succeed");
6923 }
6924
6925 #[test]
6926 fn inexact_disabled_default_preserves_exact_only_behavior() {
6927 assert!(!RenameDetectionOptions::default().detect_inexact);
6931 assert_eq!(
6932 RenameDetectionOptions::default().rename_threshold,
6933 DEFAULT_RENAME_THRESHOLD
6934 );
6935
6936 let root = temp_root();
6937 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6938 .expect("test operation should succeed");
6939 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6940
6941 let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
6942 let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
6943 let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
6944 let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
6945
6946 let entries = diff_name_status_trees_with_rename_options(
6947 &db,
6948 ObjectFormat::Sha1,
6949 &left,
6950 &right,
6951 RenameDetectionOptions::default(),
6952 )
6953 .expect("test operation should succeed");
6954 let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
6955 assert!(statuses.contains(&NameStatus::Added));
6956 assert!(statuses.contains(&NameStatus::Deleted));
6957 assert!(!statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))));
6958 fs::remove_dir_all(root).expect("test operation should succeed");
6959 }
6960
6961 fn apply_ops(old: &[DiffLine<'_>], new: &[DiffLine<'_>], ops: &[DiffOp]) -> Vec<u8> {
6969 let mut oi = 0usize;
6970 let mut ni = 0usize;
6971 let mut rebuilt: Vec<u8> = Vec::new();
6972 for op in ops {
6973 match *op {
6974 DiffOp::Equal(n) => {
6975 for _ in 0..n {
6976 assert_eq!(old[oi], new[ni], "Equal op covered unequal lines");
6978 rebuilt.extend_from_slice(old[oi].content);
6979 oi += 1;
6980 ni += 1;
6981 }
6982 }
6983 DiffOp::Delete(n) => oi += n,
6984 DiffOp::Insert(n) => {
6985 for _ in 0..n {
6986 rebuilt.extend_from_slice(new[ni].content);
6987 ni += 1;
6988 }
6989 }
6990 }
6991 }
6992 assert_eq!(oi, old.len(), "script did not consume all of old");
6994 assert_eq!(ni, new.len(), "script did not consume all of new");
6995 rebuilt
6996 }
6997
6998 fn assert_valid_script(old_bytes: &[u8], new_bytes: &[u8], ops: &[DiffOp]) {
7001 let old = split_lines(old_bytes);
7002 let new = split_lines(new_bytes);
7003 let rebuilt = apply_ops(&old, &new, ops);
7004 assert_eq!(rebuilt, new_bytes, "script did not rebuild new");
7005 for pair in ops.windows(2) {
7006 let same_kind = matches!(
7007 (pair[0], pair[1]),
7008 (DiffOp::Equal(_), DiffOp::Equal(_))
7009 | (DiffOp::Delete(_), DiffOp::Delete(_))
7010 | (DiffOp::Insert(_), DiffOp::Insert(_))
7011 );
7012 assert!(!same_kind, "ops not coalesced: {:?}", ops);
7013 }
7014 }
7015
7016 fn check_all_algorithms(old_bytes: &[u8], new_bytes: &[u8]) {
7019 let old = split_lines(old_bytes);
7020 let new = split_lines(new_bytes);
7021 for algo in [
7022 DiffAlgorithm::Myers,
7023 DiffAlgorithm::Minimal,
7024 DiffAlgorithm::Patience,
7025 DiffAlgorithm::Histogram,
7026 ] {
7027 let ops = diff_lines_with_algorithm(&old, &new, algo);
7028 assert_valid_script(old_bytes, new_bytes, &ops);
7029 }
7030 }
7031
7032 #[test]
7033 fn patience_and_histogram_match_myers_on_simple_cases() {
7034 let cases: &[(&[u8], &[u8], Vec<DiffOp>)] = &[
7037 (
7038 b"a\nb\nc\n",
7039 b"a\nx\nc\n",
7040 vec![
7041 DiffOp::Equal(1),
7042 DiffOp::Delete(1),
7043 DiffOp::Insert(1),
7044 DiffOp::Equal(1),
7045 ],
7046 ),
7047 (b"a\nb\nc\n", b"a\nb\nc\n", vec![DiffOp::Equal(3)]),
7048 (b"", b"a\nb\n", vec![DiffOp::Insert(2)]),
7049 (b"a\nb\n", b"", vec![DiffOp::Delete(2)]),
7050 (
7051 b"a\nb\nc\nd\n",
7052 b"a\nc\nd\n",
7053 vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)],
7054 ),
7055 ];
7056 for (old_bytes, new_bytes, expected) in cases {
7057 let old = split_lines(old_bytes);
7058 let new = split_lines(new_bytes);
7059 assert_eq!(&patience_diff_lines(&old, &new), expected);
7060 assert_eq!(&histogram_diff_lines(&old, &new), expected);
7061 assert_eq!(&myers_diff_lines(&old, &new), expected);
7062 }
7063 }
7064
7065 #[test]
7066 fn patience_handles_both_empty() {
7067 let empty = split_lines(b"");
7068 assert!(patience_diff_lines(&empty, &empty).is_empty());
7069 assert!(histogram_diff_lines(&empty, &empty).is_empty());
7070 }
7071
7072 #[test]
7073 fn patience_aligns_unique_anchors_across_moved_block() {
7074 check_all_algorithms(
7079 b"alpha\nbeta\ngamma\ndelta\n",
7080 b"gamma\ndelta\nalpha\nbeta\n",
7081 );
7082 }
7083
7084 #[test]
7085 fn histogram_differs_from_myers_keeping_block_contiguous() {
7086 let old = b"b\na\n";
7092 let new = b"a\nb\nb\na\nb\n";
7093 let old_l = split_lines(old);
7094 let new_l = split_lines(new);
7095
7096 let myers = myers_diff_lines(&old_l, &new_l);
7097 let histogram = histogram_diff_lines(&old_l, &new_l);
7098
7099 assert_valid_script(old, new, &myers);
7101 assert_valid_script(old, new, &histogram);
7102
7103 assert_eq!(
7106 myers,
7107 vec![
7108 DiffOp::Insert(1),
7109 DiffOp::Equal(1),
7110 DiffOp::Insert(1),
7111 DiffOp::Equal(1),
7112 DiffOp::Insert(1),
7113 ]
7114 );
7115 assert_eq!(
7116 histogram,
7117 vec![DiffOp::Insert(2), DiffOp::Equal(2), DiffOp::Insert(1)]
7118 );
7119 assert_ne!(myers, histogram);
7121 }
7122
7123 #[test]
7124 fn patience_differs_from_myers_on_repeated_lines() {
7125 let old = b"b\na\n";
7131 let new = b"a\na\nb\n";
7132 let old_l = split_lines(old);
7133 let new_l = split_lines(new);
7134
7135 let myers = myers_diff_lines(&old_l, &new_l);
7136 let patience = patience_diff_lines(&old_l, &new_l);
7137
7138 assert_valid_script(old, new, &myers);
7139 assert_valid_script(old, new, &patience);
7140
7141 assert_eq!(
7142 myers,
7143 vec![DiffOp::Delete(1), DiffOp::Equal(1), DiffOp::Insert(2)]
7144 );
7145 assert_eq!(
7146 patience,
7147 vec![DiffOp::Insert(2), DiffOp::Equal(1), DiffOp::Delete(1)]
7148 );
7149 assert_ne!(myers, patience);
7150 }
7151
7152 #[test]
7153 fn realistic_function_insertion_all_valid() {
7154 let old = b"int f() {\n return 1;\n}\n";
7159 let new = b"int g() {\n return 2;\n}\n\nint f() {\n return 1;\n}\n";
7160 check_all_algorithms(old, new);
7161 }
7162
7163 #[test]
7164 fn histogram_anchors_on_rare_line_when_no_unique_line_exists() {
7165 check_all_algorithms(b"x\nx\nmid\nx\nx\n", b"x\nmid\nx\nx\nx\n");
7170 check_all_algorithms(
7171 b"dup\ndup\nrare\ndup\ndup\n",
7172 b"dup\nrare\ndup\ndup\ndup\ndup\n",
7173 );
7174 }
7175
7176 #[test]
7177 fn all_algorithms_treat_missing_final_newline_as_change() {
7178 let old = split_lines(b"a\nb");
7180 let new = split_lines(b"a\nb\n");
7181 for algo in [
7182 DiffAlgorithm::Myers,
7183 DiffAlgorithm::Minimal,
7184 DiffAlgorithm::Patience,
7185 DiffAlgorithm::Histogram,
7186 ] {
7187 let ops = diff_lines_with_algorithm(&old, &new, algo);
7188 assert_eq!(
7189 ops,
7190 vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Insert(1)],
7191 "algorithm {:?} mishandled missing final newline",
7192 algo
7193 );
7194 }
7195 }
7196
7197 #[test]
7198 fn dispatcher_routes_each_variant() {
7199 let old = split_lines(b"a\nb\nc\n");
7200 let new = split_lines(b"a\nx\nc\n");
7201 assert_eq!(
7202 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Myers),
7203 myers_diff_lines(&old, &new)
7204 );
7205 assert_eq!(
7207 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Minimal),
7208 myers_diff_lines(&old, &new)
7209 );
7210 assert_eq!(
7211 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Patience),
7212 patience_diff_lines(&old, &new)
7213 );
7214 assert_eq!(
7215 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Histogram),
7216 histogram_diff_lines(&old, &new)
7217 );
7218 }
7219
7220 #[test]
7221 fn patience_recurses_into_gaps_between_anchors() {
7222 let old = b"head\nmid1\nmid2\ntail\n";
7225 let new = b"head\nMID\nmid2\ntail\n";
7226 let old_l = split_lines(old);
7227 let new_l = split_lines(new);
7228 let ops = patience_diff_lines(&old_l, &new_l);
7229 assert_eq!(
7230 ops,
7231 vec![
7232 DiffOp::Equal(1),
7233 DiffOp::Delete(1),
7234 DiffOp::Insert(1),
7235 DiffOp::Equal(2),
7236 ]
7237 );
7238 assert_valid_script(old, new, &ops);
7239 }
7240
7241 #[test]
7242 fn patience_falls_back_to_myers_with_no_unique_lines() {
7243 let old = b"a\na\nb\nb\n";
7247 let new = b"a\na\na\nb\n";
7248 let old_l = split_lines(old);
7249 let new_l = split_lines(new);
7250 let ops = patience_diff_lines(&old_l, &new_l);
7251 assert_valid_script(old, new, &ops);
7256 }
7257
7258 #[test]
7259 fn algorithms_agree_with_myers_when_all_lines_distinct() {
7260 let cases: &[(&[u8], &[u8])] = &[
7263 (b"a\nb\nc\nd\ne\n", b"a\nc\nd\nf\ne\n"),
7264 (b"1\n2\n3\n4\n5\n6\n", b"1\n3\n2\n4\n6\n5\n"),
7265 (b"q\nw\ne\nr\nt\ny\n", b"q\nw\nx\nr\nt\nz\n"),
7266 ];
7267 for (old_bytes, new_bytes) in cases {
7268 let old = split_lines(old_bytes);
7269 let new = split_lines(new_bytes);
7270 let myers = myers_diff_lines(&old, &new);
7271 assert_eq!(
7272 patience_diff_lines(&old, &new),
7273 myers,
7274 "patience must equal Myers when all lines are distinct: {:?}",
7275 old_bytes
7276 );
7277 assert_eq!(
7278 histogram_diff_lines(&old, &new),
7279 myers,
7280 "histogram must equal Myers when all lines are distinct: {:?}",
7281 old_bytes
7282 );
7283 }
7284 }
7285
7286 #[test]
7287 fn fuzz_all_algorithms_reconstruct_new() {
7288 let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
7292 let mut next = || {
7293 state = state
7294 .wrapping_mul(6364136223846793005)
7295 .wrapping_add(1442695040888963407);
7296 (state >> 33) as u32
7297 };
7298 let alphabet = [b"a\n", b"b\n", b"c\n", b"d\n"];
7299 let build = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
7300 let len = (rng() % 9) as usize; let mut buf = Vec::new();
7302 for _ in 0..len {
7303 let pick = (rng() % alphabet.len() as u32) as usize;
7304 buf.extend_from_slice(alphabet[pick]);
7305 }
7306 if !buf.is_empty() && rng().is_multiple_of(4) {
7308 buf.pop();
7309 }
7310 buf
7311 };
7312 for _ in 0..400 {
7313 let old_bytes = build(&mut next);
7314 let new_bytes = build(&mut next);
7315 check_all_algorithms(&old_bytes, &new_bytes);
7316 }
7317 }
7318
7319 #[test]
7320 fn exhaustive_small_inputs_all_algorithms_reconstruct() {
7321 let syms = [b"a\n".to_vec(), b"b\n".to_vec(), b"c\n".to_vec()];
7326 let make = |n: usize, mut code: usize| -> Vec<u8> {
7327 let mut v = Vec::new();
7328 for _ in 0..n {
7329 v.extend_from_slice(&syms[code % 3]);
7330 code /= 3;
7331 }
7332 v
7333 };
7334 for la in 0..=5usize {
7335 for lb in 0..=5usize {
7336 for ca in 0..3usize.pow(la as u32) {
7337 for cb in 0..3usize.pow(lb as u32) {
7338 let ob = make(la, ca);
7339 let nb = make(lb, cb);
7340 let ol = split_lines(&ob);
7341 let nl = split_lines(&nb);
7342 assert_eq!(apply_ops(&ol, &nl, &myers_diff_lines(&ol, &nl)), nb);
7343 assert_eq!(apply_ops(&ol, &nl, &patience_diff_lines(&ol, &nl)), nb);
7344 assert_eq!(apply_ops(&ol, &nl, &histogram_diff_lines(&ol, &nl)), nb);
7345 }
7346 }
7347 }
7348 }
7349 }
7350
7351 #[test]
7352 fn fuzz_distinct_lines_patience_histogram_equal_myers() {
7353 let mut state: u64 = 0x1234_5678_9ABC_DEF0;
7357 let mut next = || {
7358 state = state
7359 .wrapping_mul(6364136223846793005)
7360 .wrapping_add(1442695040888963407);
7361 (state >> 33) as u32
7362 };
7363 for _ in 0..200 {
7364 let pick_subseq = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
7367 let mut buf = Vec::new();
7368 for t in 0..10u32 {
7369 if rng().is_multiple_of(2) {
7370 buf.extend_from_slice(format!("{t}\n").as_bytes());
7371 }
7372 }
7373 buf
7374 };
7375 let old_bytes = pick_subseq(&mut next);
7376 let new_bytes = pick_subseq(&mut next);
7377 let old = split_lines(&old_bytes);
7378 let new = split_lines(&new_bytes);
7379 let myers = myers_diff_lines(&old, &new);
7380 assert_eq!(patience_diff_lines(&old, &new), myers);
7381 assert_eq!(histogram_diff_lines(&old, &new), myers);
7382 }
7383 }
7384
7385 fn status_lines(entries: &[NameStatusEntry]) -> Vec<String> {
7394 entries.iter().map(|entry| entry.line()).collect()
7395 }
7396
7397 fn assert_tree_diff_matches_full(
7400 db: &FileObjectDatabase,
7401 left: &ObjectId,
7402 right: &ObjectId,
7403 options: DiffNameStatusOptions,
7404 ) {
7405 let (full_left, full_right) = collect_full_tree_pair(db, ObjectFormat::Sha1, left, right)
7407 .expect("test operation should succeed");
7408 let reference = diff_name_status_maps(
7409 &full_left,
7410 &full_right,
7411 full_left.keys().chain(full_right.keys()),
7412 options,
7413 )
7414 .expect("test operation should succeed");
7415
7416 let (pruned_left, pruned_right) = changed_tree_entries(db, ObjectFormat::Sha1, left, right)
7418 .expect("test operation should succeed");
7419 let pruned = diff_name_status_maps(
7420 &pruned_left,
7421 &pruned_right,
7422 pruned_left.keys().chain(pruned_right.keys()),
7423 options,
7424 )
7425 .expect("test operation should succeed");
7426
7427 assert_eq!(
7428 status_lines(&reference),
7429 status_lines(&pruned),
7430 "pruned map diff diverged from full map diff for {options:?}"
7431 );
7432
7433 let public =
7436 diff_name_status_trees_with_options(db, ObjectFormat::Sha1, left, right, options)
7437 .expect("test operation should succeed");
7438 assert_eq!(
7439 status_lines(&reference),
7440 status_lines(&public),
7441 "public tree diff diverged from full map diff for {options:?}"
7442 );
7443
7444 for (path, tracked) in &pruned_left {
7448 assert_eq!(
7449 full_left.get(path),
7450 Some(tracked),
7451 "pruned left entry not present (or differs) in full left map: {:?}",
7452 String::from_utf8_lossy(path)
7453 );
7454 }
7455 for (path, tracked) in &pruned_right {
7456 assert_eq!(
7457 full_right.get(path),
7458 Some(tracked),
7459 "pruned right entry not present (or differs) in full right map: {:?}",
7460 String::from_utf8_lossy(path)
7461 );
7462 }
7463 for entry in &reference {
7466 let path = entry.path.as_bytes();
7467 match entry.status {
7468 NameStatus::Added => assert!(
7469 pruned_right.contains_key(path),
7470 "added path dropped by pruning: {:?}",
7471 String::from_utf8_lossy(path)
7472 ),
7473 NameStatus::Deleted => assert!(
7474 pruned_left.contains_key(path),
7475 "deleted path dropped by pruning: {:?}",
7476 String::from_utf8_lossy(path)
7477 ),
7478 NameStatus::Modified => {
7479 assert!(
7480 pruned_left.contains_key(path) && pruned_right.contains_key(path),
7481 "modified path dropped by pruning: {:?}",
7482 String::from_utf8_lossy(path)
7483 );
7484 }
7485 _ => {}
7486 }
7487 }
7488 }
7489
7490 fn assert_tree_diff_matches_full_all_modes(
7494 db: &FileObjectDatabase,
7495 left: &ObjectId,
7496 right: &ObjectId,
7497 ) {
7498 for detect_renames in [false, true] {
7499 for detect_copies in [false, true] {
7500 let options = DiffNameStatusOptions {
7501 detect_renames,
7502 detect_copies,
7503 find_copies_harder: false,
7504 rename_empty: true,
7505 };
7506 assert_tree_diff_matches_full(db, left, right, options);
7507 }
7508 }
7509 }
7510
7511 fn structural_db() -> (PathBuf, FileObjectDatabase) {
7513 let root = temp_root();
7514 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7515 .expect("test operation should succeed");
7516 let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
7517 (root, db)
7518 }
7519
7520 #[test]
7521 fn pruned_walk_skips_identical_subtree_and_matches_full() {
7522 let (root, mut db) = structural_db();
7526
7527 let s1 = write_blob(&mut db, b"shared one\n");
7529 let s2 = write_blob(&mut db, b"shared two\n");
7530 let s3 = write_blob(&mut db, b"deep nested\n");
7531 let shared_inner = write_tree(&mut db, &[(b"c.txt", 0o100644, s3.clone())]);
7532 let shared = write_tree(
7533 &mut db,
7534 &[
7535 (b"a.txt", 0o100644, s1.clone()),
7536 (b"b.txt", 0o100644, s2.clone()),
7537 (b"inner", 0o040000, shared_inner.clone()),
7538 ],
7539 );
7540
7541 let app_old = write_blob(&mut db, b"version 1\n");
7543 let app_new = write_blob(&mut db, b"version 2\n");
7544 let app_left = write_tree(&mut db, &[(b"main.rs", 0o100644, app_old)]);
7545 let app_right = write_tree(&mut db, &[(b"main.rs", 0o100644, app_new)]);
7546
7547 let left = write_tree(
7548 &mut db,
7549 &[
7550 (b"app", 0o040000, app_left),
7551 (b"shared", 0o040000, shared.clone()),
7552 ],
7553 );
7554 let right = write_tree(
7555 &mut db,
7556 &[(b"app", 0o040000, app_right), (b"shared", 0o040000, shared)],
7557 );
7558
7559 let (pruned_left, pruned_right) =
7561 changed_tree_entries(&db, ObjectFormat::Sha1, &left, &right)
7562 .expect("test operation should succeed");
7563 assert_eq!(
7564 pruned_left.keys().collect::<Vec<_>>(),
7565 vec![&b"app/main.rs".to_vec()],
7566 "pruning should leave only the changed path on the left"
7567 );
7568 assert_eq!(
7569 pruned_right.keys().collect::<Vec<_>>(),
7570 vec![&b"app/main.rs".to_vec()],
7571 "pruning should leave only the changed path on the right"
7572 );
7573 assert!(
7574 !pruned_left.contains_key(b"shared/a.txt".as_slice()),
7575 "identical shared subtree must not appear in pruned maps"
7576 );
7577
7578 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7579 fs::remove_dir_all(root).expect("test operation should succeed");
7580 }
7581
7582 #[test]
7583 fn pruned_walk_matches_full_for_add_delete_modify_nested() {
7584 let (root, mut db) = structural_db();
7587
7588 let keep = write_blob(&mut db, b"unchanged\n");
7589 let untouched_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, keep.clone())]);
7590
7591 let nested_old = write_blob(&mut db, b"nested old\n");
7592 let nested_new = write_blob(&mut db, b"nested new\n");
7593 let dir_left = write_tree(
7594 &mut db,
7595 &[
7596 (b"changed.txt", 0o100644, nested_old),
7597 (b"stable.txt", 0o100644, keep.clone()),
7598 ],
7599 );
7600 let dir_right = write_tree(
7601 &mut db,
7602 &[
7603 (b"changed.txt", 0o100644, nested_new),
7604 (b"stable.txt", 0o100644, keep.clone()),
7605 ],
7606 );
7607
7608 let only_left = write_blob(&mut db, b"will be deleted\n");
7609 let only_right = write_blob(&mut db, b"freshly added\n");
7610
7611 let left = write_tree(
7612 &mut db,
7613 &[
7614 (b"dir", 0o040000, dir_left),
7615 (b"gone.txt", 0o100644, only_left),
7616 (b"untouched", 0o040000, untouched_dir.clone()),
7617 ],
7618 );
7619 let right = write_tree(
7620 &mut db,
7621 &[
7622 (b"dir", 0o040000, dir_right),
7623 (b"new.txt", 0o100644, only_right),
7624 (b"untouched", 0o040000, untouched_dir),
7625 ],
7626 );
7627
7628 let entries = diff_name_status_trees_with_options(
7629 &db,
7630 ObjectFormat::Sha1,
7631 &left,
7632 &right,
7633 DiffNameStatusOptions {
7634 detect_renames: false,
7635 detect_copies: false,
7636 find_copies_harder: false,
7637 rename_empty: true,
7638 },
7639 )
7640 .expect("test operation should succeed");
7641 assert_eq!(
7642 status_lines(&entries),
7643 vec![
7644 "M\tdir/changed.txt".to_string(),
7645 "D\tgone.txt".to_string(),
7646 "A\tnew.txt".to_string(),
7647 ],
7648 "unexpected raw status for mixed nested diff"
7649 );
7650
7651 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7652 fs::remove_dir_all(root).expect("test operation should succeed");
7653 }
7654
7655 #[test]
7656 fn pruned_walk_matches_full_for_rename_across_dirs() {
7657 let (root, mut db) = structural_db();
7661
7662 let moved = write_blob(&mut db, b"i get moved across directories\n");
7663 let companion = write_blob(&mut db, b"i stay put\n");
7664 let stable_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, companion.clone())]);
7665
7666 let src_dir = write_tree(&mut db, &[(b"file.txt", 0o100644, moved.clone())]);
7667 let dst_dir = write_tree(&mut db, &[(b"renamed.txt", 0o100644, moved.clone())]);
7668
7669 let left = write_tree(
7670 &mut db,
7671 &[
7672 (b"src", 0o040000, src_dir),
7673 (b"stable", 0o040000, stable_dir.clone()),
7674 ],
7675 );
7676 let right = write_tree(
7677 &mut db,
7678 &[
7679 (b"dst", 0o040000, dst_dir),
7680 (b"stable", 0o040000, stable_dir),
7681 ],
7682 );
7683
7684 let entries = diff_name_status_trees_with_options(
7685 &db,
7686 ObjectFormat::Sha1,
7687 &left,
7688 &right,
7689 DiffNameStatusOptions {
7690 detect_renames: true,
7691 detect_copies: false,
7692 find_copies_harder: false,
7693 rename_empty: true,
7694 },
7695 )
7696 .expect("test operation should succeed");
7697 assert_eq!(
7698 status_lines(&entries),
7699 vec!["R100\tsrc/file.txt\tdst/renamed.txt".to_string()],
7700 "rename across dirs should be detected on pruned set"
7701 );
7702
7703 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7704 fs::remove_dir_all(root).expect("test operation should succeed");
7705 }
7706
7707 #[test]
7708 fn pruned_walk_matches_full_for_binary_and_mode_change() {
7709 let (root, mut db) = structural_db();
7713
7714 let bin_old = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 255, 1, 2, 3]);
7715 let bin_new = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 254, 9, 8, 7]);
7716 let script = write_blob(&mut db, b"#!/bin/sh\necho hi\n");
7717
7718 let left = write_tree(
7719 &mut db,
7720 &[
7721 (b"image.bin", 0o100644, bin_old),
7722 (b"run.sh", 0o100644, script.clone()),
7723 ],
7724 );
7725 let right = write_tree(
7726 &mut db,
7727 &[
7728 (b"image.bin", 0o100644, bin_new),
7729 (b"run.sh", 0o100755, script),
7731 ],
7732 );
7733
7734 let entries = diff_name_status_trees_with_options(
7735 &db,
7736 ObjectFormat::Sha1,
7737 &left,
7738 &right,
7739 DiffNameStatusOptions {
7740 detect_renames: false,
7741 detect_copies: false,
7742 find_copies_harder: false,
7743 rename_empty: true,
7744 },
7745 )
7746 .expect("test operation should succeed");
7747 assert_eq!(
7748 status_lines(&entries),
7749 vec!["M\timage.bin".to_string(), "M\trun.sh".to_string()],
7750 "binary edit and mode-only change should both be Modify"
7751 );
7752
7753 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7754 fs::remove_dir_all(root).expect("test operation should succeed");
7755 }
7756
7757 #[test]
7758 fn pruned_walk_matches_full_for_dir_replaced_by_file() {
7759 let (root, mut db) = structural_db();
7764
7765 let inner_a = write_blob(&mut db, b"inner a\n");
7766 let inner_b = write_blob(&mut db, b"inner b\n");
7767 let thing_dir = write_tree(
7768 &mut db,
7769 &[(b"a.txt", 0o100644, inner_a), (b"b.txt", 0o100644, inner_b)],
7770 );
7771 let thing_file = write_blob(&mut db, b"now i am a file\n");
7772
7773 let other_file = write_blob(&mut db, b"i was a file\n");
7775 let other_inner = write_blob(&mut db, b"now nested\n");
7776 let other_dir = write_tree(&mut db, &[(b"x.txt", 0o100644, other_inner)]);
7777
7778 let left = write_tree(
7779 &mut db,
7780 &[
7781 (b"other", 0o100644, other_file),
7782 (b"thing", 0o040000, thing_dir),
7783 ],
7784 );
7785 let right = write_tree(
7786 &mut db,
7787 &[
7788 (b"other", 0o040000, other_dir),
7789 (b"thing", 0o100644, thing_file),
7790 ],
7791 );
7792
7793 let entries = diff_name_status_trees_with_options(
7794 &db,
7795 ObjectFormat::Sha1,
7796 &left,
7797 &right,
7798 DiffNameStatusOptions {
7799 detect_renames: false,
7800 detect_copies: false,
7801 find_copies_harder: false,
7802 rename_empty: true,
7803 },
7804 )
7805 .expect("test operation should succeed");
7806 assert_eq!(
7807 status_lines(&entries),
7808 vec![
7809 "D\tother".to_string(),
7810 "A\tother/x.txt".to_string(),
7811 "A\tthing".to_string(),
7812 "D\tthing/a.txt".to_string(),
7813 "D\tthing/b.txt".to_string(),
7814 ],
7815 "dir<->file swap should flatten to independent adds/deletes"
7816 );
7817
7818 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7819 fs::remove_dir_all(root).expect("test operation should succeed");
7820 }
7821
7822 #[test]
7823 fn pruned_walk_matches_full_for_identical_trees() {
7824 let (root, mut db) = structural_db();
7827
7828 let blob = write_blob(&mut db, b"same\n");
7829 let sub = write_tree(&mut db, &[(b"f.txt", 0o100644, blob.clone())]);
7830 let tree = write_tree(
7831 &mut db,
7832 &[(b"sub", 0o040000, sub), (b"top.txt", 0o100644, blob)],
7833 );
7834
7835 let (pruned_left, pruned_right) =
7836 changed_tree_entries(&db, ObjectFormat::Sha1, &tree, &tree)
7837 .expect("test operation should succeed");
7838 assert!(
7839 pruned_left.is_empty() && pruned_right.is_empty(),
7840 "identical trees must produce no changed entries"
7841 );
7842
7843 let entries = diff_name_status_trees_with_options(
7844 &db,
7845 ObjectFormat::Sha1,
7846 &tree,
7847 &tree,
7848 DiffNameStatusOptions::default(),
7849 )
7850 .expect("test operation should succeed");
7851 assert!(entries.is_empty(), "identical trees must produce no diff");
7852
7853 assert_tree_diff_matches_full_all_modes(&db, &tree, &tree);
7854 fs::remove_dir_all(root).expect("test operation should succeed");
7855 }
7856
7857 #[test]
7858 fn find_copies_harder_uses_full_left_map_and_finds_unchanged_source() {
7859 let (root, mut db) = structural_db();
7864
7865 let template = write_blob(&mut db, b"reusable boilerplate content\n");
7868 let lib_dir = write_tree(&mut db, &[(b"template.txt", 0o100644, template.clone())]);
7869
7870 let trigger_old = write_blob(&mut db, b"trigger old\n");
7871 let trigger_new = write_blob(&mut db, b"trigger new\n");
7872
7873 let left = write_tree(
7874 &mut db,
7875 &[
7876 (b"lib", 0o040000, lib_dir.clone()),
7877 (b"trigger.txt", 0o100644, trigger_old),
7878 ],
7879 );
7880 let right = write_tree(
7881 &mut db,
7882 &[
7883 (b"copy.txt", 0o100644, template.clone()),
7884 (b"lib", 0o040000, lib_dir),
7885 (b"trigger.txt", 0o100644, trigger_new),
7886 ],
7887 );
7888
7889 let options = DiffNameStatusOptions {
7890 detect_renames: true,
7891 detect_copies: true,
7892 find_copies_harder: true,
7893 rename_empty: true,
7894 };
7895
7896 let (full_left, full_right) =
7898 collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
7899 .expect("test operation should succeed");
7900 let reference = diff_name_status_maps(
7901 &full_left,
7902 &full_right,
7903 full_left.keys().chain(full_right.keys()),
7904 options,
7905 )
7906 .expect("test operation should succeed");
7907
7908 let public =
7909 diff_name_status_trees_with_options(&db, ObjectFormat::Sha1, &left, &right, options)
7910 .expect("test operation should succeed");
7911 assert_eq!(
7912 status_lines(&reference),
7913 status_lines(&public),
7914 "find-copies-harder public diff must match full-map reference"
7915 );
7916 assert!(
7918 public
7919 .iter()
7920 .any(|entry| matches!(entry.status, NameStatus::Copied(_))
7921 && entry.old_path.as_ref().map(|p| p.as_bytes())
7922 == Some(b"lib/template.txt".as_slice())
7923 && entry.path == b"copy.txt"),
7924 "copy from unchanged source must be found with find_copies_harder: {public:?}"
7925 );
7926 fs::remove_dir_all(root).expect("test operation should succeed");
7927 }
7928
7929 #[test]
7930 fn pruned_walk_matches_full_with_inexact_rename_options() {
7931 let (root, mut db) = structural_db();
7934
7935 let untouched = write_blob(&mut db, b"untouched file\n");
7936 let untouched_dir = write_tree(&mut db, &[(b"u.txt", 0o100644, untouched.clone())]);
7937
7938 let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
7940 let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
7941
7942 let left = write_tree(
7943 &mut db,
7944 &[
7945 (b"a.txt", 0o100644, old),
7946 (b"keep", 0o040000, untouched_dir.clone()),
7947 ],
7948 );
7949 let right = write_tree(
7950 &mut db,
7951 &[
7952 (b"b.txt", 0o100644, new),
7953 (b"keep", 0o040000, untouched_dir),
7954 ],
7955 );
7956
7957 let options = RenameDetectionOptions {
7958 base: DiffNameStatusOptions {
7959 detect_renames: true,
7960 detect_copies: false,
7961 find_copies_harder: false,
7962 rename_empty: true,
7963 },
7964 detect_inexact: true,
7965 rename_threshold: DEFAULT_RENAME_THRESHOLD,
7966 copy_threshold: DEFAULT_RENAME_THRESHOLD,
7967 };
7968
7969 let (full_left, full_right) =
7971 collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
7972 .expect("test operation should succeed");
7973 let reference = diff_name_status_maps_with_renames(
7974 &full_left,
7975 &full_right,
7976 full_left.keys().chain(full_right.keys()),
7977 options,
7978 |oid| read_blob_bytes(&db, oid),
7979 )
7980 .expect("test operation should succeed");
7981
7982 let public = diff_name_status_trees_with_rename_options(
7983 &db,
7984 ObjectFormat::Sha1,
7985 &left,
7986 &right,
7987 options,
7988 )
7989 .expect("test operation should succeed");
7990
7991 assert_eq!(
7992 status_lines(&reference),
7993 status_lines(&public),
7994 "inexact rename via pruned walk must match full-map reference"
7995 );
7996 assert_eq!(
7997 status_lines(&public),
7998 vec!["R075\ta.txt\tb.txt".to_string()],
7999 "expected a 75% inexact rename"
8000 );
8001 fs::remove_dir_all(root).expect("test operation should succeed");
8002 }
8003}