1use sley_core::{GitError, ObjectFormat, ObjectId, RepoPath, Result, object_id_for_bytes};
2
3pub mod range;
4pub mod render;
5pub mod ws;
6
7pub use sley_core::BString;
8use sley_index::{BorrowedIndex, Index, IndexStatCache};
9use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntries, TreeEntry};
10use sley_odb::{FileObjectDatabase, ObjectReader, ObjectWriter};
11use sley_refs::{FileRefStore, RefTarget};
12use std::collections::{BTreeMap, BTreeSet, HashMap};
13use std::fs;
14use std::path::{Path, PathBuf};
15
16pub fn gitlink_git_dir(sub_root: &Path) -> Option<PathBuf> {
34 let dot_git = sub_root.join(".git");
35 let metadata = fs::symlink_metadata(&dot_git).ok()?;
36 if metadata.is_dir() {
37 return Some(dot_git);
38 }
39 if !metadata.is_file() {
40 return None;
41 }
42 let contents = fs::read_to_string(&dot_git).ok()?;
43 let target = contents.strip_prefix("gitdir:")?.trim();
44 if target.is_empty() {
45 return None;
46 }
47 let target = PathBuf::from(target);
48 let git_dir = if target.is_absolute() {
49 target
50 } else {
51 sub_root.join(target)
52 };
53 if git_dir.is_dir() {
54 Some(git_dir)
55 } else {
56 None
57 }
58}
59
60pub fn gitlink_head_oid(sub_root: &Path, format: ObjectFormat) -> Option<ObjectId> {
66 let git_dir = gitlink_git_dir(sub_root)?;
67 let store = FileRefStore::new(&git_dir, format);
68 let mut target = store.read_ref("HEAD").ok()??;
69 for _ in 0..10 {
71 match target {
72 RefTarget::Direct(oid) => return Some(oid),
73 RefTarget::Symbolic(name) => target = store.read_ref(&name).ok()??,
74 }
75 }
76 None
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub struct DiffLine<'a> {
97 pub content: &'a [u8],
99 pub has_newline: bool,
101}
102
103impl<'a> DiffLine<'a> {
104 pub fn bytes_without_newline(&self) -> &'a [u8] {
106 if self.has_newline {
107 self.content.strip_suffix(b"\n").unwrap_or(self.content)
108 } else {
109 self.content
110 }
111 }
112}
113
114pub fn split_lines(blob: &[u8]) -> Vec<DiffLine<'_>> {
121 let mut lines = Vec::new();
122 let mut start = 0usize;
123 let len = blob.len();
124 let mut idx = 0usize;
125 while idx < len {
126 if blob[idx] == b'\n' {
127 lines.push(DiffLine {
128 content: &blob[start..=idx],
129 has_newline: true,
130 });
131 idx += 1;
132 start = idx;
133 } else {
134 idx += 1;
135 }
136 }
137 if start < len {
138 lines.push(DiffLine {
139 content: &blob[start..len],
140 has_newline: false,
141 });
142 }
143 lines
144}
145
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
156pub enum DiffOp {
157 Equal(usize),
159 Delete(usize),
161 Insert(usize),
163}
164
165pub fn myers_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
174 let n_total = old.len();
177 let m_total = new.len();
178 let mut prefix = 0usize;
179 while prefix < n_total && prefix < m_total && old[prefix] == new[prefix] {
180 prefix += 1;
181 }
182 let mut suffix = 0usize;
183 while suffix < n_total - prefix
184 && suffix < m_total - prefix
185 && old[n_total - 1 - suffix] == new[m_total - 1 - suffix]
186 {
187 suffix += 1;
188 }
189
190 let old_mid = &old[prefix..n_total - suffix];
191 let new_mid = &new[prefix..m_total - suffix];
192
193 let mut ops: Vec<DiffOp> = Vec::new();
194 if prefix > 0 {
195 ops.push(DiffOp::Equal(prefix));
196 }
197 myers_core(old_mid, new_mid, &mut ops);
198 if suffix > 0 {
199 ops.push(DiffOp::Equal(suffix));
200 }
201 coalesce_ops(ops)
202}
203
204fn myers_core(old: &[DiffLine<'_>], new: &[DiffLine<'_>], out: &mut Vec<DiffOp>) {
212 let n = old.len() as isize;
213 let m = new.len() as isize;
214 if n == 0 {
215 if m > 0 {
216 out.push(DiffOp::Insert(m as usize));
217 }
218 return;
219 }
220 if m == 0 {
221 out.push(DiffOp::Delete(n as usize));
222 return;
223 }
224
225 let max = (n + m) as usize;
226 let offset = max as isize; let width = 2 * max + 1;
228 let mut v = vec![0isize; width];
230 let mut trace: Vec<Vec<isize>> = Vec::new();
232
233 let mut found_d: Option<usize> = None;
234 'search: for d in 0..=(max as isize) {
235 trace.push(v.clone());
236 let mut k = -d;
237 while k <= d {
238 let kidx = (k + offset) as usize;
239 let mut x = if k == -d
242 || (k != d && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
243 {
244 v[(k + 1 + offset) as usize]
246 } else {
247 v[(k - 1 + offset) as usize] + 1
249 };
250 let mut y = x - k;
251 while x < n && y < m && old[x as usize] == new[y as usize] {
253 x += 1;
254 y += 1;
255 }
256 v[kidx] = x;
257 if x >= n && y >= m {
258 found_d = Some(d as usize);
259 break 'search;
260 }
261 k += 2;
262 }
263 }
264
265 let Some(d_end) = found_d else {
268 out.push(DiffOp::Delete(n as usize));
269 out.push(DiffOp::Insert(m as usize));
270 return;
271 };
272
273 backtrack(n, m, &trace, d_end, offset, out);
274}
275
276fn backtrack(
282 n: isize,
283 m: isize,
284 trace: &[Vec<isize>],
285 d_end: usize,
286 offset: isize,
287 out: &mut Vec<DiffOp>,
288) {
289 let mut x = n;
290 let mut y = m;
291 let mut rev: Vec<DiffOp> = Vec::new();
292
293 for d in (0..=d_end).rev() {
294 let v = &trace[d];
295 let k = x - y;
296 let prev_k = if k == -(d as isize)
298 || (k != d as isize && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
299 {
300 k + 1 } else {
302 k - 1 };
304 let prev_x = v[(prev_k + offset) as usize];
305 let prev_y = prev_x - prev_k;
306
307 while x > prev_x && y > prev_y {
309 rev.push(DiffOp::Equal(1));
310 x -= 1;
311 y -= 1;
312 }
313 if d > 0 {
314 if x == prev_x {
315 rev.push(DiffOp::Insert(1));
317 } else {
318 rev.push(DiffOp::Delete(1));
320 }
321 x = prev_x;
322 y = prev_y;
323 }
324 }
325
326 rev.reverse();
327 out.extend(rev);
328}
329
330fn coalesce_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
332 let mut out: Vec<DiffOp> = Vec::with_capacity(ops.len());
333 for op in ops {
334 match (out.last_mut(), op) {
335 (Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
336 (Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
337 (Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
338 _ => out.push(op),
339 }
340 }
341 out
342}
343
344#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
363pub struct WsIgnore {
364 pub all_space: bool,
366 pub space_change: bool,
368 pub space_at_eol: bool,
370 pub cr_at_eol: bool,
372}
373
374impl WsIgnore {
375 pub fn is_empty(&self) -> bool {
377 !(self.all_space || self.space_change || self.space_at_eol || self.cr_at_eol)
378 }
379}
380
381#[inline]
384fn xdl_isspace(c: u8) -> bool {
385 matches!(c, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
386}
387
388pub(crate) fn canonicalize_line_for_match(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
405 canonicalize_line(line, ignore)
406}
407
408fn canonicalize_line(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
409 if ignore.all_space {
410 return line.iter().copied().filter(|&c| !xdl_isspace(c)).collect();
411 }
412 if ignore.space_change {
413 let mut out = Vec::with_capacity(line.len());
414 let mut i = 0usize;
415 while i < line.len() {
416 if xdl_isspace(line[i]) {
417 while i < line.len() && xdl_isspace(line[i]) {
419 i += 1;
420 }
421 out.push(b' ');
422 } else {
423 out.push(line[i]);
424 i += 1;
425 }
426 }
427 if out.last() == Some(&b' ') {
429 out.pop();
430 }
431 return out;
432 }
433 if ignore.space_at_eol {
434 let mut end = line.len();
435 while end > 0 && xdl_isspace(line[end - 1]) {
436 end -= 1;
437 }
438 return line[..end].to_vec();
439 }
440 if ignore.cr_at_eol {
441 if let Some(stripped) = line.strip_suffix(b"\n") {
443 if let Some(without_cr) = stripped.strip_suffix(b"\r") {
444 let mut out = without_cr.to_vec();
445 out.push(b'\n');
446 return out;
447 }
448 } else if let Some(without_cr) = line.strip_suffix(b"\r") {
449 return without_cr.to_vec();
451 }
452 return line.to_vec();
453 }
454 line.to_vec()
455}
456
457fn line_is_blank(line: &[u8], ignore: WsIgnore) -> bool {
462 if ignore.is_empty() {
463 line.len() <= 1
464 } else {
465 line.iter().all(|&c| xdl_isspace(c))
466 }
467}
468
469pub fn myers_diff_lines_ws(
478 old: &[DiffLine<'_>],
479 new: &[DiffLine<'_>],
480 ignore: WsIgnore,
481 algorithm: DiffAlgorithm,
482) -> Vec<DiffOp> {
483 if ignore.is_empty() {
484 return diff_lines_with_algorithm(old, new, algorithm);
485 }
486 let old_canon: Vec<Vec<u8>> = old
487 .iter()
488 .map(|l| canonicalize_line(l.content, ignore))
489 .collect();
490 let new_canon: Vec<Vec<u8>> = new
491 .iter()
492 .map(|l| canonicalize_line(l.content, ignore))
493 .collect();
494 let old_lines: Vec<DiffLine<'_>> = old_canon
495 .iter()
496 .map(|c| DiffLine {
497 content: c.as_slice(),
498 has_newline: true,
499 })
500 .collect();
501 let new_lines: Vec<DiffLine<'_>> = new_canon
502 .iter()
503 .map(|c| DiffLine {
504 content: c.as_slice(),
505 has_newline: true,
506 })
507 .collect();
508 diff_lines_with_algorithm(&old_lines, &new_lines, algorithm)
509}
510
511type LineKey<'a> = (&'a [u8], bool);
537
538#[inline]
539fn line_key<'a>(line: &DiffLine<'a>) -> LineKey<'a> {
540 (line.content, line.has_newline)
541}
542
543pub fn patience_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
557 let mut ops: Vec<DiffOp> = Vec::new();
558 patience_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
559 coalesce_ops(ops)
560}
561
562pub fn histogram_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
574 let mut ops: Vec<DiffOp> = Vec::new();
575 histogram_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
576 coalesce_ops(ops)
577}
578
579pub fn diff_lines_with_algorithm(
592 old: &[DiffLine<'_>],
593 new: &[DiffLine<'_>],
594 algorithm: DiffAlgorithm,
595) -> Vec<DiffOp> {
596 match algorithm {
597 DiffAlgorithm::Myers | DiffAlgorithm::Minimal => myers_diff_lines(old, new),
598 DiffAlgorithm::Patience => patience_diff_lines(old, new),
599 DiffAlgorithm::Histogram => histogram_diff_lines(old, new),
600 }
601}
602
603fn emit_trivial_range(a0: usize, a1: usize, b0: usize, b1: usize, out: &mut Vec<DiffOp>) -> bool {
610 let old_len = a1 - a0;
611 let new_len = b1 - b0;
612 if old_len == 0 && new_len == 0 {
613 return true;
614 }
615 if old_len == 0 {
616 out.push(DiffOp::Insert(new_len));
617 return true;
618 }
619 if new_len == 0 {
620 out.push(DiffOp::Delete(old_len));
621 return true;
622 }
623 false
624}
625
626fn trim_common(
634 old: &[DiffLine<'_>],
635 new: &[DiffLine<'_>],
636 mut a0: usize,
637 mut a1: usize,
638 mut b0: usize,
639 mut b1: usize,
640 out: &mut Vec<DiffOp>,
641) -> (usize, usize, usize, usize, usize) {
642 let mut prefix = 0usize;
643 while a0 < a1 && b0 < b1 && old[a0] == new[b0] {
644 a0 += 1;
645 b0 += 1;
646 prefix += 1;
647 }
648 if prefix > 0 {
649 out.push(DiffOp::Equal(prefix));
650 }
651 let mut suffix = 0usize;
652 while a1 > a0 && b1 > b0 && old[a1 - 1] == new[b1 - 1] {
653 a1 -= 1;
654 b1 -= 1;
655 suffix += 1;
656 }
657 (a0, a1, b0, b1, suffix)
658}
659
660fn patience_recurse(
662 old: &[DiffLine<'_>],
663 new: &[DiffLine<'_>],
664 a0: usize,
665 a1: usize,
666 b0: usize,
667 b1: usize,
668 out: &mut Vec<DiffOp>,
669) {
670 if emit_trivial_range(a0, a1, b0, b1, out) {
671 return;
672 }
673 let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
674 if !emit_trivial_range(a0, a1, b0, b1, out) {
675 match patience_anchors(old, new, a0, a1, b0, b1) {
676 Some(anchors) => {
677 let mut cur_a = a0;
680 let mut cur_b = b0;
681 for (ai, bi) in anchors {
682 patience_recurse(old, new, cur_a, ai, cur_b, bi, out);
683 out.push(DiffOp::Equal(1));
684 cur_a = ai + 1;
685 cur_b = bi + 1;
686 }
687 patience_recurse(old, new, cur_a, a1, cur_b, b1, out);
689 }
690 None => myers_core(&old[a0..a1], &new[b0..b1], out),
693 }
694 }
695 if suffix > 0 {
696 out.push(DiffOp::Equal(suffix));
697 }
698}
699
700fn patience_anchors(
709 old: &[DiffLine<'_>],
710 new: &[DiffLine<'_>],
711 a0: usize,
712 a1: usize,
713 b0: usize,
714 b1: usize,
715) -> Option<Vec<(usize, usize)>> {
716 struct Occ {
719 count: usize,
720 pos: usize,
721 }
722 let mut in_old: HashMap<LineKey<'_>, Occ> = HashMap::new();
723 for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
724 in_old
725 .entry(line_key(line))
726 .and_modify(|o| o.count += 1)
727 .or_insert(Occ { count: 1, pos: i });
728 }
729 let mut in_new: HashMap<LineKey<'_>, Occ> = HashMap::new();
730 for (j, line) in new.iter().enumerate().take(b1).skip(b0) {
731 in_new
732 .entry(line_key(line))
733 .and_modify(|o| o.count += 1)
734 .or_insert(Occ { count: 1, pos: j });
735 }
736
737 let mut pairs: Vec<(usize, usize)> = Vec::new();
739 for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
740 let key = line_key(line);
741 let Some(o) = in_old.get(&key) else { continue };
742 if o.count != 1 || o.pos != i {
743 continue;
744 }
745 if let Some(n) = in_new.get(&key)
747 && n.count == 1
748 {
749 pairs.push((i, n.pos));
750 }
751 }
752 if pairs.is_empty() {
753 return None;
754 }
755
756 let lis = longest_increasing_by_new(&pairs);
760 if lis.is_empty() { None } else { Some(lis) }
761}
762
763fn longest_increasing_by_new(pairs: &[(usize, usize)]) -> Vec<(usize, usize)> {
771 if pairs.is_empty() {
772 return Vec::new();
773 }
774 let mut tails: Vec<usize> = Vec::new();
777 let mut prev: Vec<Option<usize>> = vec![None; pairs.len()];
779
780 for i in 0..pairs.len() {
781 let val = pairs[i].1;
782 let mut lo = 0usize;
784 let mut hi = tails.len();
785 while lo < hi {
786 let mid = lo + (hi - lo) / 2;
787 if pairs[tails[mid]].1 < val {
788 lo = mid + 1;
789 } else {
790 hi = mid;
791 }
792 }
793 if lo > 0 {
794 prev[i] = Some(tails[lo - 1]);
795 }
796 if lo == tails.len() {
797 tails.push(i);
798 } else {
799 tails[lo] = i;
800 }
801 }
802
803 let mut result: Vec<(usize, usize)> = Vec::with_capacity(tails.len());
805 let mut cur = tails.last().copied();
806 while let Some(i) = cur {
807 result.push(pairs[i]);
808 cur = prev[i];
809 }
810 result.reverse();
811 result
812}
813
814fn histogram_recurse(
816 old: &[DiffLine<'_>],
817 new: &[DiffLine<'_>],
818 a0: usize,
819 a1: usize,
820 b0: usize,
821 b1: usize,
822 out: &mut Vec<DiffOp>,
823) {
824 if emit_trivial_range(a0, a1, b0, b1, out) {
825 return;
826 }
827 let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
828 if !emit_trivial_range(a0, a1, b0, b1, out) {
829 match histogram_region(old, new, a0, a1, b0, b1) {
830 Some(region) => {
831 histogram_recurse(old, new, a0, region.old_start, b0, region.new_start, out);
834 out.push(DiffOp::Equal(region.len));
835 histogram_recurse(
836 old,
837 new,
838 region.old_start + region.len,
839 a1,
840 region.new_start + region.len,
841 b1,
842 out,
843 );
844 }
845 None => myers_core(&old[a0..a1], &new[b0..b1], out),
847 }
848 }
849 if suffix > 0 {
850 out.push(DiffOp::Equal(suffix));
851 }
852}
853
854struct HistogramRegion {
856 old_start: usize,
857 new_start: usize,
858 len: usize,
859}
860
861fn histogram_region(
871 old: &[DiffLine<'_>],
872 new: &[DiffLine<'_>],
873 a0: usize,
874 a1: usize,
875 b0: usize,
876 b1: usize,
877) -> Option<HistogramRegion> {
878 let mut buckets: HashMap<LineKey<'_>, Vec<usize>> = HashMap::new();
880 for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
881 buckets.entry(line_key(line)).or_default().push(i);
882 }
883
884 let mut best: Option<HistogramRegion> = None;
885 let mut best_count = usize::MAX;
887 let mut best_len = 0usize;
888
889 let mut bj = b0;
890 while bj < b1 {
891 let key = line_key(&new[bj]);
892 let Some(positions) = buckets.get(&key) else {
893 bj += 1;
894 continue;
895 };
896 let occ = positions.len();
897 let mut next_bj = bj + 1;
900 for &ai in positions {
901 let mut start_a = ai;
903 let mut start_b = bj;
904 while start_a > a0 && start_b > b0 && old[start_a - 1] == new[start_b - 1] {
905 start_a -= 1;
906 start_b -= 1;
907 }
908 let mut len = 0usize;
910 while start_a + len < a1
911 && start_b + len < b1
912 && old[start_a + len] == new[start_b + len]
913 {
914 len += 1;
915 }
916 let run_count = occ;
919 let better = run_count < best_count || (run_count == best_count && len > best_len);
920 if better && len > 0 {
921 best_count = run_count;
922 best_len = len;
923 best = Some(HistogramRegion {
924 old_start: start_a,
925 new_start: start_b,
926 len,
927 });
928 if start_b + len > next_bj {
931 next_bj = start_b + len;
932 }
933 }
934 }
935 bj = next_bj.max(bj + 1);
936 }
937
938 best
939}
940
941#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
943pub enum ConflictStyle {
944 #[default]
946 Merge,
947 Diff3,
950}
951
952#[derive(Debug, Clone, Copy)]
954pub struct MergeBlobOptions<'a> {
955 pub ours_label: &'a str,
957 pub theirs_label: &'a str,
959 pub base_label: &'a str,
961 pub style: ConflictStyle,
963}
964
965impl Default for MergeBlobOptions<'_> {
966 fn default() -> Self {
967 Self {
968 ours_label: "ours",
969 theirs_label: "theirs",
970 base_label: "base",
971 style: ConflictStyle::Merge,
972 }
973 }
974}
975
976#[derive(Debug, Clone, PartialEq, Eq)]
978pub struct MergeBlobResult {
979 pub content: Vec<u8>,
981 pub conflicted: bool,
983}
984
985pub fn merge_blobs(
1000 base: &[u8],
1001 ours: &[u8],
1002 theirs: &[u8],
1003 options: &MergeBlobOptions<'_>,
1004) -> MergeBlobResult {
1005 let base_lines = split_lines(base);
1006 let ours_lines = split_lines(ours);
1007 let theirs_lines = split_lines(theirs);
1008
1009 let ours_matches = matching_regions(&base_lines, &ours_lines);
1012 let theirs_matches = matching_regions(&base_lines, &theirs_lines);
1013
1014 let stable = common_stable_segments(&ours_matches, &theirs_matches);
1018
1019 let mut writer = MergeWriter::new(options);
1020 let mut base_idx = 0usize;
1022 let mut our_idx = 0usize;
1023 let mut their_idx = 0usize;
1024
1025 for seg in &stable {
1026 let base_region = &base_lines[base_idx..seg.base_start];
1028 let our_region = &ours_lines[our_idx..seg.ours_start];
1029 let their_region = &theirs_lines[their_idx..seg.theirs_start];
1030 emit_region(&mut writer, base_region, our_region, their_region);
1031
1032 writer.emit_lines(&base_lines[seg.base_start..seg.base_start + seg.len]);
1034
1035 base_idx = seg.base_start + seg.len;
1036 our_idx = seg.ours_start + seg.len;
1037 their_idx = seg.theirs_start + seg.len;
1038 }
1039
1040 emit_region(
1043 &mut writer,
1044 &base_lines[base_idx..],
1045 &ours_lines[our_idx..],
1046 &theirs_lines[their_idx..],
1047 );
1048
1049 writer.finish()
1050}
1051
1052fn emit_region(
1055 writer: &mut MergeWriter<'_>,
1056 base_region: &[DiffLine<'_>],
1057 our_region: &[DiffLine<'_>],
1058 their_region: &[DiffLine<'_>],
1059) {
1060 if our_region.is_empty() && their_region.is_empty() {
1061 return;
1062 }
1063 let our_changed = our_region != base_region;
1064 let their_changed = their_region != base_region;
1065 match (our_changed, their_changed) {
1066 (false, false) => writer.emit_lines(base_region),
1067 (true, false) => writer.emit_lines(our_region),
1068 (false, true) => writer.emit_lines(their_region),
1069 (true, true) => {
1070 if our_region == their_region {
1071 writer.emit_lines(our_region);
1073 } else {
1074 writer.emit_conflict(our_region, base_region, their_region);
1075 }
1076 }
1077 }
1078}
1079
1080#[derive(Debug, Clone, Copy)]
1083struct MatchRegion {
1084 base_start: usize,
1085 side_start: usize,
1086 len: usize,
1087}
1088
1089#[derive(Debug, Clone, Copy)]
1091struct StableSegment {
1092 base_start: usize,
1093 ours_start: usize,
1094 theirs_start: usize,
1095 len: usize,
1096}
1097
1098fn matching_regions(base: &[DiffLine<'_>], side: &[DiffLine<'_>]) -> Vec<MatchRegion> {
1104 let ops = myers_diff_lines(base, side);
1105 let mut regions = Vec::new();
1106 let mut base_idx = 0usize;
1107 let mut side_idx = 0usize;
1108 for op in ops {
1109 match op {
1110 DiffOp::Equal(n) => {
1111 regions.push(MatchRegion {
1112 base_start: base_idx,
1113 side_start: side_idx,
1114 len: n,
1115 });
1116 base_idx += n;
1117 side_idx += n;
1118 }
1119 DiffOp::Delete(n) => base_idx += n,
1120 DiffOp::Insert(n) => side_idx += n,
1121 }
1122 }
1123 regions
1124}
1125
1126fn common_stable_segments(ours: &[MatchRegion], theirs: &[MatchRegion]) -> Vec<StableSegment> {
1134 let mut segments = Vec::new();
1135 let mut oi = 0usize;
1136 let mut ti = 0usize;
1137 while oi < ours.len() && ti < theirs.len() {
1138 let o = ours[oi];
1139 let t = theirs[ti];
1140 let o_end = o.base_start + o.len;
1141 let t_end = t.base_start + t.len;
1142 let lo = o.base_start.max(t.base_start);
1143 let hi = o_end.min(t_end);
1144 if lo < hi {
1145 segments.push(StableSegment {
1146 base_start: lo,
1147 ours_start: o.side_start + (lo - o.base_start),
1148 theirs_start: t.side_start + (lo - t.base_start),
1149 len: hi - lo,
1150 });
1151 }
1152 if o_end <= t_end {
1154 oi += 1;
1155 } else {
1156 ti += 1;
1157 }
1158 }
1159 segments
1160}
1161
1162struct MergeWriter<'a> {
1165 out: Vec<u8>,
1166 conflicted: bool,
1167 options: &'a MergeBlobOptions<'a>,
1168}
1169
1170impl<'a> MergeWriter<'a> {
1171 fn new(options: &'a MergeBlobOptions<'a>) -> Self {
1172 Self {
1173 out: Vec::new(),
1174 conflicted: false,
1175 options,
1176 }
1177 }
1178
1179 fn emit_lines(&mut self, lines: &[DiffLine<'_>]) {
1182 for line in lines {
1183 self.out.extend_from_slice(line.content);
1184 }
1185 }
1186
1187 fn emit_conflict(
1193 &mut self,
1194 ours: &[DiffLine<'_>],
1195 base: &[DiffLine<'_>],
1196 theirs: &[DiffLine<'_>],
1197 ) {
1198 self.conflicted = true;
1199 self.write_marker(b'<', self.options.ours_label);
1200 self.emit_section(ours);
1201 if self.options.style == ConflictStyle::Diff3 {
1202 self.ensure_newline();
1203 self.write_marker(b'|', self.options.base_label);
1204 self.emit_section(base);
1205 }
1206 self.ensure_newline();
1207 self.write_divider();
1208 self.emit_section(theirs);
1209 self.ensure_newline();
1210 self.write_marker(b'>', self.options.theirs_label);
1211 }
1212
1213 fn emit_section(&mut self, lines: &[DiffLine<'_>]) {
1215 for line in lines {
1216 self.out.extend_from_slice(line.content);
1217 }
1218 }
1219
1220 fn ensure_newline(&mut self) {
1223 if !self.out.is_empty() && self.out.last() != Some(&b'\n') {
1224 self.out.push(b'\n');
1225 }
1226 }
1227
1228 fn write_marker(&mut self, ch: u8, label: &str) {
1232 for _ in 0..7 {
1233 self.out.push(ch);
1234 }
1235 if !label.is_empty() {
1236 self.out.push(b' ');
1237 self.out.extend_from_slice(label.as_bytes());
1238 }
1239 self.out.push(b'\n');
1240 }
1241
1242 fn write_divider(&mut self) {
1244 for _ in 0..7 {
1245 self.out.push(b'=');
1246 }
1247 self.out.push(b'\n');
1248 }
1249
1250 fn finish(self) -> MergeBlobResult {
1251 MergeBlobResult {
1252 content: self.out,
1253 conflicted: self.conflicted,
1254 }
1255 }
1256}
1257
1258#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1259pub enum DiffAlgorithm {
1260 Myers,
1261 Minimal,
1262 Patience,
1263 Histogram,
1264}
1265
1266#[derive(Debug, Clone, PartialEq, Eq)]
1267pub enum FileChange {
1268 Add { path: RepoPath },
1269 Delete { path: RepoPath },
1270 Modify { path: RepoPath },
1271 Rename { old: RepoPath, new: RepoPath },
1272 Copy { source: RepoPath, dest: RepoPath },
1273}
1274
1275#[derive(Debug, Clone, PartialEq, Eq)]
1276pub struct Conflict {
1277 pub path: RepoPath,
1278 pub ours: Vec<u8>,
1279 pub theirs: Vec<u8>,
1280}
1281
1282#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1283pub enum NameStatus {
1284 Added,
1285 Deleted,
1286 Modified,
1287 Renamed(u8),
1288 Copied(u8),
1289 Unmerged,
1293}
1294
1295impl NameStatus {
1296 pub const fn code(self) -> char {
1297 match self {
1298 Self::Added => 'A',
1299 Self::Deleted => 'D',
1300 Self::Modified => 'M',
1301 Self::Renamed(_) => 'R',
1302 Self::Copied(_) => 'C',
1303 Self::Unmerged => 'U',
1304 }
1305 }
1306
1307 pub fn label(self) -> String {
1308 match self {
1309 Self::Renamed(score) => format!("R{score:03}"),
1310 Self::Copied(score) => format!("C{score:03}"),
1311 _ => self.code().to_string(),
1312 }
1313 }
1314}
1315
1316#[derive(Debug, Clone, PartialEq, Eq)]
1317pub struct NameStatusEntry {
1318 pub status: NameStatus,
1319 pub path: BString,
1320 pub old_path: Option<BString>,
1321 pub old_mode: Option<u32>,
1322 pub new_mode: Option<u32>,
1323 pub old_oid: Option<ObjectId>,
1324 pub new_oid: Option<ObjectId>,
1325}
1326
1327impl NameStatusEntry {
1328 pub fn line(&self) -> String {
1329 if let Some(old_path) = &self.old_path {
1330 format!(
1331 "{}\t{}\t{}",
1332 self.status.label(),
1333 String::from_utf8_lossy(old_path.as_bytes()),
1334 String::from_utf8_lossy(self.path.as_bytes())
1335 )
1336 } else {
1337 format!(
1338 "{}\t{}",
1339 self.status.label(),
1340 String::from_utf8_lossy(self.path.as_bytes())
1341 )
1342 }
1343 }
1344}
1345
1346#[derive(Debug, Clone, PartialEq, Eq)]
1347pub struct IndexGitlinkEntry {
1348 pub path: BString,
1349 pub oid: ObjectId,
1350}
1351
1352#[derive(Debug, Clone, PartialEq, Eq)]
1353pub struct IndexWorktreeDiff {
1354 pub entries: Vec<NameStatusEntry>,
1355 pub staged_gitlinks: Vec<IndexGitlinkEntry>,
1356}
1357
1358#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1359pub struct DiffNameStatusOptions {
1360 pub detect_renames: bool,
1361 pub detect_copies: bool,
1362 pub find_copies_harder: bool,
1363 pub rename_empty: bool,
1364}
1365
1366impl Default for DiffNameStatusOptions {
1367 fn default() -> Self {
1368 Self {
1369 detect_renames: true,
1370 detect_copies: false,
1371 find_copies_harder: false,
1372 rename_empty: true,
1373 }
1374 }
1375}
1376
1377pub const DEFAULT_RENAME_THRESHOLD: u8 = 50;
1381
1382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1398pub struct RenameDetectionOptions {
1399 pub base: DiffNameStatusOptions,
1402 pub detect_inexact: bool,
1405 pub rename_threshold: u8,
1409 pub copy_threshold: u8,
1413}
1414
1415impl Default for RenameDetectionOptions {
1416 fn default() -> Self {
1417 Self {
1418 base: DiffNameStatusOptions::default(),
1419 detect_inexact: false,
1420 rename_threshold: DEFAULT_RENAME_THRESHOLD,
1421 copy_threshold: DEFAULT_RENAME_THRESHOLD,
1422 }
1423 }
1424}
1425
1426impl RenameDetectionOptions {
1427 pub fn inexact(base: DiffNameStatusOptions) -> Self {
1430 Self {
1431 base,
1432 detect_inexact: true,
1433 ..Self::default()
1434 }
1435 }
1436}
1437
1438pub fn diff_name_status_head_worktree(
1439 worktree_root: impl AsRef<Path>,
1440 git_dir: impl AsRef<Path>,
1441 format: ObjectFormat,
1442) -> Result<Vec<NameStatusEntry>> {
1443 diff_name_status_head_worktree_with_options(
1444 worktree_root,
1445 git_dir,
1446 format,
1447 DiffNameStatusOptions::default(),
1448 )
1449}
1450
1451pub fn diff_name_status_head_worktree_with_options(
1452 worktree_root: impl AsRef<Path>,
1453 git_dir: impl AsRef<Path>,
1454 format: ObjectFormat,
1455 options: DiffNameStatusOptions,
1456) -> Result<Vec<NameStatusEntry>> {
1457 let worktree_root = worktree_root.as_ref();
1458 let git_dir = git_dir.as_ref();
1459 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1460 let head = head_tree_entries(git_dir, format, &db)?;
1461 let IndexSnapshot {
1462 entries: index,
1463 stat_cache,
1464 } = read_index_snapshot(git_dir, format)?;
1465 let index_gitlinks = index_gitlinks(&index);
1466 let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1467 let worktree = worktree_entries_for_path_set(
1468 worktree_root,
1469 format,
1470 &candidate_paths,
1471 &index_gitlinks,
1472 Some(&stat_cache),
1473 )?;
1474 let changes = diff_name_status_maps_for_path_set(&head, &worktree, &candidate_paths, options)?;
1475 Ok(mark_unstaged_worktree_oids_unresolved(
1476 changes, &index, &worktree,
1477 ))
1478}
1479
1480pub fn diff_name_status_head_worktree_with_rename_options(
1484 worktree_root: impl AsRef<Path>,
1485 git_dir: impl AsRef<Path>,
1486 format: ObjectFormat,
1487 options: RenameDetectionOptions,
1488) -> Result<Vec<NameStatusEntry>> {
1489 let worktree_root = worktree_root.as_ref();
1490 let git_dir = git_dir.as_ref();
1491 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1492 let head = head_tree_entries(git_dir, format, &db)?;
1493 let IndexSnapshot {
1494 entries: index,
1495 stat_cache,
1496 } = read_index_snapshot(git_dir, format)?;
1497 let index_gitlinks = index_gitlinks(&index);
1498 let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1499 let worktree = worktree_entries_for_path_set(
1500 worktree_root,
1501 format,
1502 &candidate_paths,
1503 &index_gitlinks,
1504 Some(&stat_cache),
1505 )?;
1506 let cache = worktree_blob_cache_for_path_set(
1507 worktree_root,
1508 &head,
1509 &worktree,
1510 &candidate_paths,
1511 options,
1512 )?;
1513 let changes = diff_name_status_maps_with_renames_for_path_set(
1514 &head,
1515 &worktree,
1516 &candidate_paths,
1517 options,
1518 |oid| cache_or_odb_blob(&cache, &db, oid),
1519 )?;
1520 Ok(mark_unstaged_worktree_oids_unresolved(
1521 changes, &index, &worktree,
1522 ))
1523}
1524
1525pub fn diff_name_status_head_index(
1526 git_dir: impl AsRef<Path>,
1527 format: ObjectFormat,
1528) -> Result<Vec<NameStatusEntry>> {
1529 diff_name_status_head_index_with_options(git_dir, format, DiffNameStatusOptions::default())
1530}
1531
1532pub fn diff_name_status_head_index_with_options(
1533 git_dir: impl AsRef<Path>,
1534 format: ObjectFormat,
1535 options: DiffNameStatusOptions,
1536) -> Result<Vec<NameStatusEntry>> {
1537 let git_dir = git_dir.as_ref();
1538 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1539 let head = head_tree_entries(git_dir, format, &db)?;
1540 let index = read_index_entries(git_dir, format)?;
1541 diff_name_status_maps(&head, &index, head.keys().chain(index.keys()), options)
1542}
1543
1544pub fn diff_name_status_head_index_with_rename_options(
1548 git_dir: impl AsRef<Path>,
1549 format: ObjectFormat,
1550 options: RenameDetectionOptions,
1551) -> Result<Vec<NameStatusEntry>> {
1552 let git_dir = git_dir.as_ref();
1553 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1554 let head = head_tree_entries(git_dir, format, &db)?;
1555 let index = read_index_entries(git_dir, format)?;
1556 diff_name_status_maps_with_renames(
1557 &head,
1558 &index,
1559 head.keys().chain(index.keys()),
1560 options,
1561 |oid| read_blob_bytes(&db, oid),
1562 )
1563}
1564
1565fn tree_entries(
1575 tree_oid: &ObjectId,
1576 format: ObjectFormat,
1577 db: &FileObjectDatabase,
1578) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
1579 let mut entries = BTreeMap::new();
1580 if *tree_oid == empty_tree_oid(format)? {
1581 return Ok(entries);
1582 }
1583 collect_tree_entries(db, format, tree_oid, Vec::new(), &mut entries)?;
1584 Ok(entries)
1585}
1586
1587fn empty_tree_oid(format: ObjectFormat) -> Result<ObjectId> {
1590 object_id_for_bytes(format, "tree", b"")
1591}
1592
1593pub fn diff_name_status_tree_index_with_options(
1597 git_dir: impl AsRef<Path>,
1598 format: ObjectFormat,
1599 tree_oid: &ObjectId,
1600 options: DiffNameStatusOptions,
1601) -> Result<Vec<NameStatusEntry>> {
1602 let git_dir = git_dir.as_ref();
1603 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1604 let tree = tree_entries(tree_oid, format, &db)?;
1605 let index = read_index_entries(git_dir, format)?;
1606 diff_name_status_maps(&tree, &index, tree.keys().chain(index.keys()), options)
1607}
1608
1609pub fn diff_name_status_tree_index_with_rename_options(
1614 git_dir: impl AsRef<Path>,
1615 format: ObjectFormat,
1616 tree_oid: &ObjectId,
1617 options: RenameDetectionOptions,
1618) -> Result<Vec<NameStatusEntry>> {
1619 let git_dir = git_dir.as_ref();
1620 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1621 let tree = tree_entries(tree_oid, format, &db)?;
1622 let index = read_index_entries(git_dir, format)?;
1623 diff_name_status_maps_with_renames(
1624 &tree,
1625 &index,
1626 tree.keys().chain(index.keys()),
1627 options,
1628 |oid| read_blob_bytes(&db, oid),
1629 )
1630}
1631
1632pub fn diff_name_status_tree_worktree_with_options(
1638 worktree_root: impl AsRef<Path>,
1639 git_dir: impl AsRef<Path>,
1640 format: ObjectFormat,
1641 tree_oid: &ObjectId,
1642 options: DiffNameStatusOptions,
1643) -> Result<Vec<NameStatusEntry>> {
1644 let worktree_root = worktree_root.as_ref();
1645 let git_dir = git_dir.as_ref();
1646 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1647 let tree = tree_entries(tree_oid, format, &db)?;
1648 let IndexSnapshot {
1649 entries: index,
1650 stat_cache,
1651 } = read_index_snapshot(git_dir, format)?;
1652 let index_gitlinks = index_gitlinks(&index);
1653 let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1654 let worktree = worktree_entries_for_path_set(
1655 worktree_root,
1656 format,
1657 &candidate_paths,
1658 &index_gitlinks,
1659 Some(&stat_cache),
1660 )?;
1661 let changes = diff_name_status_maps_for_path_set(&tree, &worktree, &candidate_paths, options)?;
1662 Ok(mark_unstaged_worktree_oids_unresolved(
1663 changes, &index, &worktree,
1664 ))
1665}
1666
1667pub fn diff_name_status_tree_worktree_with_rename_options(
1673 worktree_root: impl AsRef<Path>,
1674 git_dir: impl AsRef<Path>,
1675 format: ObjectFormat,
1676 tree_oid: &ObjectId,
1677 options: RenameDetectionOptions,
1678) -> Result<Vec<NameStatusEntry>> {
1679 let worktree_root = worktree_root.as_ref();
1680 let git_dir = git_dir.as_ref();
1681 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1682 let tree = tree_entries(tree_oid, format, &db)?;
1683 let IndexSnapshot {
1684 entries: index,
1685 stat_cache,
1686 } = read_index_snapshot(git_dir, format)?;
1687 let index_gitlinks = index_gitlinks(&index);
1688 let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1689 let worktree = worktree_entries_for_path_set(
1690 worktree_root,
1691 format,
1692 &candidate_paths,
1693 &index_gitlinks,
1694 Some(&stat_cache),
1695 )?;
1696 let cache = worktree_blob_cache_for_path_set(
1697 worktree_root,
1698 &tree,
1699 &worktree,
1700 &candidate_paths,
1701 options,
1702 )?;
1703 let changes = diff_name_status_maps_with_renames_for_path_set(
1704 &tree,
1705 &worktree,
1706 &candidate_paths,
1707 options,
1708 |oid| cache_or_odb_blob(&cache, &db, oid),
1709 )?;
1710 Ok(mark_unstaged_worktree_oids_unresolved(
1711 changes, &index, &worktree,
1712 ))
1713}
1714
1715pub fn diff_name_status_index_worktree(
1716 worktree_root: impl AsRef<Path>,
1717 git_dir: impl AsRef<Path>,
1718 format: ObjectFormat,
1719) -> Result<Vec<NameStatusEntry>> {
1720 diff_name_status_index_worktree_with_options(
1721 worktree_root,
1722 git_dir,
1723 format,
1724 DiffNameStatusOptions::default(),
1725 )
1726}
1727
1728pub fn diff_name_status_index_worktree_with_options(
1729 worktree_root: impl AsRef<Path>,
1730 git_dir: impl AsRef<Path>,
1731 format: ObjectFormat,
1732 options: DiffNameStatusOptions,
1733) -> Result<Vec<NameStatusEntry>> {
1734 Ok(diff_name_status_index_worktree_with_options_and_gitlinks(
1735 worktree_root,
1736 git_dir,
1737 format,
1738 options,
1739 )?
1740 .entries)
1741}
1742
1743pub fn diff_name_status_index_worktree_with_options_and_gitlinks(
1744 worktree_root: impl AsRef<Path>,
1745 git_dir: impl AsRef<Path>,
1746 format: ObjectFormat,
1747 options: DiffNameStatusOptions,
1748) -> Result<IndexWorktreeDiff> {
1749 let IndexWorktreeDiff {
1750 entries,
1751 staged_gitlinks,
1752 } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1753 let entries = apply_name_status_options_to_index_worktree_changes(entries, options)?;
1754 Ok(IndexWorktreeDiff {
1755 entries,
1756 staged_gitlinks,
1757 })
1758}
1759
1760pub fn diff_name_status_index_worktree_with_rename_options(
1764 worktree_root: impl AsRef<Path>,
1765 git_dir: impl AsRef<Path>,
1766 format: ObjectFormat,
1767 options: RenameDetectionOptions,
1768) -> Result<Vec<NameStatusEntry>> {
1769 Ok(
1770 diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1771 worktree_root,
1772 git_dir,
1773 format,
1774 options,
1775 )?
1776 .entries,
1777 )
1778}
1779
1780pub fn diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1781 worktree_root: impl AsRef<Path>,
1782 git_dir: impl AsRef<Path>,
1783 format: ObjectFormat,
1784 options: RenameDetectionOptions,
1785) -> Result<IndexWorktreeDiff> {
1786 let IndexWorktreeDiff {
1787 entries,
1788 staged_gitlinks,
1789 } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1790 let entries = apply_name_status_options_to_index_worktree_changes(entries, options.base)?;
1794 Ok(IndexWorktreeDiff {
1795 entries,
1796 staged_gitlinks,
1797 })
1798}
1799
1800fn diff_name_status_index_worktree_changes(
1801 worktree_root: &Path,
1802 git_dir: &Path,
1803 format: ObjectFormat,
1804) -> Result<IndexWorktreeDiff> {
1805 let index_path = sley_index::repository_index_path(git_dir);
1806 let index_metadata = match fs::metadata(&index_path) {
1807 Ok(metadata) => metadata,
1808 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1809 return Ok(IndexWorktreeDiff {
1810 entries: Vec::new(),
1811 staged_gitlinks: Vec::new(),
1812 });
1813 }
1814 Err(err) => return Err(err.into()),
1815 };
1816 let index_bytes = fs::read(&index_path)?;
1817 if let Ok(index) = BorrowedIndex::parse(&index_bytes, format)
1818 && index.extension(&sley_index::INDEX_EXT_LINK)?.is_none()
1819 && !index.entries.iter().any(borrowed_entry_is_sparse_dir)
1820 {
1821 let (has_non_normal_stage, staged_gitlinks) =
1822 index_worktree_metadata_for_entries(&index.entries);
1823 if has_non_normal_stage {
1824 return diff_name_status_index_worktree_changes_from_snapshot(
1825 worktree_root,
1826 git_dir,
1827 format,
1828 );
1829 }
1830 let stat_cache =
1831 IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1832 let entries = diff_name_status_index_worktree_changes_for_borrowed_entries(
1833 worktree_root,
1834 format,
1835 &index.entries,
1836 &stat_cache,
1837 )?;
1838 return Ok(IndexWorktreeDiff {
1839 entries,
1840 staged_gitlinks,
1841 });
1842 }
1843 let index = expand_sparse_index_for_worktree_diff(
1844 sley_index::read_repository_index(git_dir, format)?,
1845 git_dir,
1846 format,
1847 )?;
1848 let (has_non_normal_stage, staged_gitlinks) =
1849 index_worktree_metadata_for_entries(&index.entries);
1850 if has_non_normal_stage {
1851 return diff_name_status_index_worktree_changes_from_snapshot(
1852 worktree_root,
1853 git_dir,
1854 format,
1855 );
1856 }
1857 let stat_cache =
1858 IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1859 let entries = diff_name_status_index_worktree_changes_for_entries(
1860 worktree_root,
1861 format,
1862 &index.entries,
1863 &stat_cache,
1864 )?;
1865 Ok(IndexWorktreeDiff {
1866 entries,
1867 staged_gitlinks,
1868 })
1869}
1870
1871fn borrowed_entry_is_sparse_dir(entry: &sley_index::IndexEntryRef<'_>) -> bool {
1872 entry.mode == sley_index::SPARSE_DIR_MODE && entry.is_skip_worktree()
1873}
1874
1875fn expand_sparse_index_for_worktree_diff(
1876 mut index: Index,
1877 git_dir: &Path,
1878 format: ObjectFormat,
1879) -> Result<Index> {
1880 if !index
1881 .entries
1882 .iter()
1883 .any(sley_index::IndexEntry::is_sparse_dir)
1884 {
1885 return Ok(index);
1886 }
1887
1888 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1889 let mut expanded = Vec::with_capacity(index.entries.len());
1890 for entry in std::mem::take(&mut index.entries) {
1891 if !entry.is_sparse_dir() {
1892 expanded.push(entry);
1893 continue;
1894 }
1895
1896 let dir_prefix = entry.path.as_bytes();
1897 for (rel_path, (mode, oid)) in flatten_tree(&db, format, &entry.oid)? {
1898 let mut path = dir_prefix.to_vec();
1899 path.extend_from_slice(&rel_path);
1900 let mut expanded_entry = sley_index::IndexEntry {
1901 ctime_seconds: 0,
1902 ctime_nanoseconds: 0,
1903 mtime_seconds: 0,
1904 mtime_nanoseconds: 0,
1905 dev: 0,
1906 ino: 0,
1907 mode,
1908 uid: 0,
1909 gid: 0,
1910 size: 0,
1911 oid,
1912 flags: 0,
1913 flags_extended: 0,
1914 path: BString::from(path),
1915 };
1916 expanded_entry.set_skip_worktree(true);
1917 expanded_entry.refresh_name_length();
1918 expanded.push(expanded_entry);
1919 }
1920 }
1921
1922 expanded.sort_by(|left, right| left.path.as_bytes().cmp(right.path.as_bytes()));
1923 index.entries = expanded;
1924 index.clear_sparse_extension()?;
1925 Ok(index)
1926}
1927
1928fn diff_name_status_index_worktree_changes_for_borrowed_entries(
1929 worktree_root: &Path,
1930 format: ObjectFormat,
1931 entries: &[sley_index::IndexEntryRef<'_>],
1932 stat_cache: &IndexStatCache,
1933) -> Result<Vec<NameStatusEntry>> {
1934 const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1935 let workers = std::thread::available_parallelism()
1936 .map(|count| count.get())
1937 .unwrap_or(1)
1938 .min(8);
1939 if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1940 return diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1941 worktree_root,
1942 format,
1943 entries,
1944 stat_cache,
1945 );
1946 }
1947 let chunk_size = entries.len().div_ceil(workers);
1948 std::thread::scope(|scope| {
1949 let mut handles = Vec::new();
1950 for chunk in entries.chunks(chunk_size) {
1951 handles.push(scope.spawn(move || {
1952 diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1953 worktree_root,
1954 format,
1955 chunk,
1956 stat_cache,
1957 )
1958 }));
1959 }
1960 let mut changes = Vec::new();
1961 for handle in handles {
1962 let chunk_changes = handle
1963 .join()
1964 .map_err(|_| GitError::Command("diff worker panicked".into()))??;
1965 changes.extend(chunk_changes);
1966 }
1967 Ok(changes)
1968 })
1969}
1970
1971fn diff_name_status_index_worktree_changes_for_entries(
1972 worktree_root: &Path,
1973 format: ObjectFormat,
1974 entries: &[sley_index::IndexEntry],
1975 stat_cache: &IndexStatCache,
1976) -> Result<Vec<NameStatusEntry>> {
1977 const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1978 let workers = std::thread::available_parallelism()
1979 .map(|count| count.get())
1980 .unwrap_or(1)
1981 .min(8);
1982 if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1983 return diff_name_status_index_worktree_changes_for_entry_chunk(
1984 worktree_root,
1985 format,
1986 entries,
1987 stat_cache,
1988 );
1989 }
1990 let chunk_size = entries.len().div_ceil(workers);
1991 std::thread::scope(|scope| {
1992 let mut handles = Vec::new();
1993 for chunk in entries.chunks(chunk_size) {
1994 handles.push(scope.spawn(move || {
1995 diff_name_status_index_worktree_changes_for_entry_chunk(
1996 worktree_root,
1997 format,
1998 chunk,
1999 stat_cache,
2000 )
2001 }));
2002 }
2003 let mut changes = Vec::new();
2004 for handle in handles {
2005 let chunk_changes = handle
2006 .join()
2007 .map_err(|_| GitError::Command("diff worker panicked".into()))??;
2008 changes.extend(chunk_changes);
2009 }
2010 Ok(changes)
2011 })
2012}
2013
2014fn diff_name_status_index_worktree_changes_for_entry_chunk(
2015 worktree_root: &Path,
2016 format: ObjectFormat,
2017 entries: &[sley_index::IndexEntry],
2018 stat_cache: &IndexStatCache,
2019) -> Result<Vec<NameStatusEntry>> {
2020 let mut changes = Vec::new();
2021 let mut path = PathBuf::from(worktree_root);
2022 for entry in entries {
2023 worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path.as_bytes());
2024 if let Some(change) = index_worktree_change_for_entry(&path, format, entry, stat_cache)? {
2025 changes.push(change);
2026 }
2027 }
2028 Ok(changes)
2029}
2030
2031fn diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
2032 worktree_root: &Path,
2033 format: ObjectFormat,
2034 entries: &[sley_index::IndexEntryRef<'_>],
2035 stat_cache: &IndexStatCache,
2036) -> Result<Vec<NameStatusEntry>> {
2037 let mut changes = Vec::new();
2038 let mut path = PathBuf::from(worktree_root);
2039 for entry in entries {
2040 worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path);
2041 if let Some(change) = index_worktree_change_for_entry(&path, format, entry, stat_cache)? {
2042 changes.push(change);
2043 }
2044 }
2045 Ok(changes)
2046}
2047
2048fn index_worktree_metadata_for_entries(
2049 entries: &[impl WorktreeIndexEntry],
2050) -> (bool, Vec<IndexGitlinkEntry>) {
2051 let mut needs_snapshot = false;
2052 let mut staged_gitlinks = Vec::new();
2053 for entry in entries {
2054 if entry.stage() != sley_index::Stage::Normal {
2055 needs_snapshot = true;
2056 }
2057 if entry.is_intent_to_add() {
2060 needs_snapshot = true;
2061 }
2062 if sley_index::is_gitlink(entry.mode()) {
2063 staged_gitlinks.push(IndexGitlinkEntry {
2064 path: BString::from_bytes(entry.git_path()),
2065 oid: entry.oid(),
2066 });
2067 }
2068 }
2069 (needs_snapshot, staged_gitlinks)
2070}
2071
2072fn diff_name_status_index_worktree_changes_from_snapshot(
2073 worktree_root: &Path,
2074 git_dir: &Path,
2075 format: ObjectFormat,
2076) -> Result<IndexWorktreeDiff> {
2077 let IndexSnapshot {
2078 entries: index,
2079 stat_cache,
2080 } = read_index_snapshot(git_dir, format)?;
2081 let intent_to_add_paths = read_intent_to_add_paths(git_dir, format)?;
2086 let unmerged = read_unmerged_stages(git_dir, format)?;
2093 let index_gitlinks = index_gitlinks(&index);
2094 let staged_gitlinks = index_gitlinks
2095 .iter()
2096 .map(|(path, oid)| IndexGitlinkEntry {
2097 path: BString::from_bytes(path),
2098 oid: *oid,
2099 })
2100 .collect();
2101 let mut changes = Vec::new();
2102 for (git_path, left) in &index {
2103 let conflict_stages = unmerged.get(git_path);
2107 let right = worktree_entry_for_path(
2108 worktree_root,
2109 format,
2110 git_path,
2111 &index_gitlinks,
2112 Some(&stat_cache),
2113 )?;
2114 if conflict_stages.is_some() {
2115 changes.push(NameStatusEntry {
2119 status: NameStatus::Unmerged,
2120 path: git_path.clone().into(),
2121 old_path: None,
2122 old_mode: None,
2123 new_mode: right.as_ref().map(|entry| entry.mode),
2124 old_oid: None,
2125 new_oid: None,
2126 });
2127 }
2128 let left = match conflict_stages {
2133 Some(stages) => match stages.ours.as_ref() {
2134 Some(ours) => ours,
2135 None => continue,
2136 },
2137 None => left,
2138 };
2139 if intent_to_add_paths.contains(git_path.as_slice()) {
2144 if let Some(right) = right {
2145 changes.push(NameStatusEntry {
2146 status: NameStatus::Added,
2147 path: git_path.clone().into(),
2148 old_path: None,
2149 old_mode: None,
2150 new_mode: Some(right.mode),
2151 old_oid: None,
2152 new_oid: Some(right.oid),
2153 });
2154 }
2155 continue;
2156 }
2157 let Some(right) = right else {
2158 changes.push(NameStatusEntry {
2159 status: NameStatus::Deleted,
2160 path: git_path.clone().into(),
2161 old_path: None,
2162 old_mode: Some(left.mode),
2163 new_mode: None,
2164 old_oid: Some(left.oid),
2165 new_oid: None,
2166 });
2167 continue;
2168 };
2169 if right != *left {
2170 changes.push(NameStatusEntry {
2171 status: NameStatus::Modified,
2172 path: git_path.clone().into(),
2173 old_path: None,
2174 old_mode: Some(left.mode),
2175 new_mode: Some(right.mode),
2176 old_oid: Some(left.oid),
2177 new_oid: Some(right.oid),
2178 });
2179 }
2180 }
2181 Ok(IndexWorktreeDiff {
2182 entries: changes,
2183 staged_gitlinks,
2184 })
2185}
2186
2187struct ConflictStages {
2189 ours: Option<TrackedEntry>,
2190}
2191
2192fn read_unmerged_stages(
2196 git_dir: &Path,
2197 format: ObjectFormat,
2198) -> Result<BTreeMap<Vec<u8>, ConflictStages>> {
2199 let index_path = sley_index::repository_index_path(git_dir);
2200 if !index_path.exists() {
2201 return Ok(BTreeMap::new());
2202 }
2203 let index = sley_index::read_repository_index(git_dir, format)?;
2204 let mut out: BTreeMap<Vec<u8>, ConflictStages> = BTreeMap::new();
2205 for entry in &index.entries {
2206 let stage = entry.stage();
2207 if stage == sley_index::Stage::Normal {
2208 continue;
2209 }
2210 let path = entry.path.clone().into_bytes();
2211 let slot = out.entry(path).or_insert(ConflictStages { ours: None });
2212 if stage == sley_index::Stage::Ours {
2213 slot.ours = Some(TrackedEntry {
2214 mode: entry.mode,
2215 oid: entry.oid,
2216 });
2217 }
2218 }
2219 Ok(out)
2220}
2221
2222fn apply_name_status_options_to_index_worktree_changes(
2223 mut changes: Vec<NameStatusEntry>,
2224 options: DiffNameStatusOptions,
2225) -> Result<Vec<NameStatusEntry>> {
2226 if options.detect_renames {
2227 changes = detect_exact_renames_from_changes(changes, options.rename_empty);
2228 } else if options.detect_copies {
2229 changes.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2230 }
2231 Ok(changes)
2232}
2233
2234fn detect_exact_renames_from_changes(
2235 changes: Vec<NameStatusEntry>,
2236 rename_empty: bool,
2237) -> Vec<NameStatusEntry> {
2238 let added = changes
2239 .iter()
2240 .enumerate()
2241 .filter(|(_, entry)| entry.status == NameStatus::Added)
2242 .collect::<Vec<_>>();
2243 let deleted = changes
2244 .iter()
2245 .enumerate()
2246 .filter(|(_, entry)| entry.status == NameStatus::Deleted)
2247 .collect::<Vec<_>>();
2248 let mut consumed_added = BTreeSet::new();
2249 let mut consumed_deleted = BTreeSet::new();
2250 let mut result = Vec::new();
2251
2252 for (deleted_index, deleted_entry) in deleted {
2253 let Some(old_oid) = deleted_entry.old_oid else {
2254 continue;
2255 };
2256 if !rename_empty && is_empty_blob_oid(&old_oid) {
2257 continue;
2258 }
2259 if let Some((added_index, added_entry)) = added.iter().find(|(added_index, added_entry)| {
2260 !consumed_added.contains(added_index) && added_entry.new_oid == Some(old_oid)
2261 }) {
2262 consumed_deleted.insert(deleted_index);
2263 consumed_added.insert(*added_index);
2264 result.push(NameStatusEntry {
2265 status: NameStatus::Renamed(100),
2266 path: added_entry.path.clone(),
2267 old_path: Some(deleted_entry.path.clone()),
2268 old_mode: deleted_entry.old_mode,
2269 new_mode: added_entry.new_mode,
2270 old_oid: deleted_entry.old_oid,
2271 new_oid: added_entry.new_oid,
2272 });
2273 }
2274 }
2275
2276 for (index, entry) in changes.into_iter().enumerate() {
2277 if consumed_added.contains(&index) || consumed_deleted.contains(&index) {
2278 continue;
2279 }
2280 result.push(entry);
2281 }
2282 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2283 result
2284}
2285
2286pub fn diff_name_status_index_worktree_for_diff_files_with_options(
2310 worktree_root: impl AsRef<Path>,
2311 git_dir: impl AsRef<Path>,
2312 format: ObjectFormat,
2313 options: DiffNameStatusOptions,
2314) -> Result<Vec<NameStatusEntry>> {
2315 let worktree_root = worktree_root.as_ref();
2316 let git_dir = git_dir.as_ref();
2317 let changes =
2318 diff_name_status_index_worktree_with_options(worktree_root, git_dir, format, options)?;
2319 augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
2320}
2321
2322pub fn diff_name_status_index_worktree_for_diff_files_with_rename_options(
2326 worktree_root: impl AsRef<Path>,
2327 git_dir: impl AsRef<Path>,
2328 format: ObjectFormat,
2329 options: RenameDetectionOptions,
2330) -> Result<Vec<NameStatusEntry>> {
2331 let worktree_root = worktree_root.as_ref();
2332 let git_dir = git_dir.as_ref();
2333 let changes = diff_name_status_index_worktree_with_rename_options(
2334 worktree_root,
2335 git_dir,
2336 format,
2337 options,
2338 )?;
2339 augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
2340}
2341
2342fn augment_with_stat_dirty_entries(
2349 worktree_root: &Path,
2350 git_dir: &Path,
2351 format: ObjectFormat,
2352 mut content_changes: Vec<NameStatusEntry>,
2353) -> Result<Vec<NameStatusEntry>> {
2354 let IndexSnapshot {
2355 entries: index,
2356 stat_cache,
2357 } = read_index_snapshot(git_dir, format)?;
2358 let already_reported: BTreeSet<&[u8]> = content_changes
2361 .iter()
2362 .map(|entry| entry.path.as_bytes())
2363 .collect();
2364 let mut extras = Vec::new();
2365 for (git_path, tracked) in &index {
2366 if already_reported.contains(git_path.as_slice()) {
2367 continue;
2368 }
2369 let Some(cached) = stat_cache.entry_for_git_path(git_path) else {
2370 continue;
2371 };
2372 if sley_index::is_gitlink(tracked.mode) {
2375 continue;
2376 }
2377 let path = worktree_path_for_repo_path(worktree_root, git_path);
2378 let Ok(metadata) = fs::symlink_metadata(&path) else {
2379 continue;
2382 };
2383 if !(metadata.is_file() || metadata.file_type().is_symlink()) {
2384 continue;
2385 }
2386 match stat_cache.index_entry_worktree_stat_verdict(cached, &metadata) {
2387 sley_index::StatVerdict::Clean => continue,
2388 sley_index::StatVerdict::Dirty => {}
2389 sley_index::StatVerdict::RacyNeedsContentCheck => {
2394 if worktree_oid_matches_index(worktree_root, git_path, &metadata, tracked, format)?
2395 {
2396 continue;
2397 }
2398 }
2399 }
2400 extras.push(NameStatusEntry {
2401 status: NameStatus::Modified,
2402 path: git_path.clone().into(),
2403 old_path: None,
2404 old_mode: Some(tracked.mode),
2405 new_mode: Some(tracked.mode),
2406 old_oid: Some(tracked.oid),
2407 new_oid: None,
2408 });
2409 }
2410 if !extras.is_empty() {
2411 content_changes.extend(extras);
2412 content_changes
2413 .sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2414 }
2415 Ok(content_changes)
2416}
2417
2418fn worktree_oid_matches_index(
2423 worktree_root: &Path,
2424 git_path: &[u8],
2425 metadata: &fs::Metadata,
2426 index_entry: &TrackedEntry,
2427 format: ObjectFormat,
2428) -> Result<bool> {
2429 let file_type = metadata.file_type();
2430 let path = worktree_path_for_repo_path(worktree_root, git_path);
2431 let body = if file_type.is_symlink() {
2432 symlink_target_bytes(&path)?
2433 } else {
2434 fs::read(&path)?
2435 };
2436 let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
2437 let mode = if file_type.is_symlink() {
2438 0o120000
2439 } else {
2440 file_mode(metadata)
2441 };
2442 Ok(oid == index_entry.oid && mode == index_entry.mode)
2443}
2444
2445pub fn diff_name_status_trees_with_options(
2446 db: &FileObjectDatabase,
2447 format: ObjectFormat,
2448 left_tree: &ObjectId,
2449 right_tree: &ObjectId,
2450 options: DiffNameStatusOptions,
2451) -> Result<Vec<NameStatusEntry>> {
2452 let needs_full_maps = options.detect_copies && options.find_copies_harder;
2457 let (left_entries, right_entries) = if needs_full_maps {
2458 collect_full_tree_pair(db, format, left_tree, right_tree)?
2459 } else {
2460 changed_tree_entries(db, format, left_tree, right_tree)?
2461 };
2462 diff_name_status_maps(
2463 &left_entries,
2464 &right_entries,
2465 left_entries.keys().chain(right_entries.keys()),
2466 options,
2467 )
2468}
2469
2470pub fn diff_name_status_empty_tree_with_options(
2471 db: &FileObjectDatabase,
2472 format: ObjectFormat,
2473 right_tree: &ObjectId,
2474 options: DiffNameStatusOptions,
2475) -> Result<Vec<NameStatusEntry>> {
2476 let left_entries = BTreeMap::new();
2477 let mut right_entries = BTreeMap::new();
2478 collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2479 diff_name_status_maps(&left_entries, &right_entries, right_entries.keys(), options)
2480}
2481
2482pub fn diff_name_status_trees_with_rename_options(
2490 db: &FileObjectDatabase,
2491 format: ObjectFormat,
2492 left_tree: &ObjectId,
2493 right_tree: &ObjectId,
2494 options: RenameDetectionOptions,
2495) -> Result<Vec<NameStatusEntry>> {
2496 let needs_full_maps = options.base.detect_copies && options.base.find_copies_harder;
2500 let (left_entries, right_entries) = if needs_full_maps {
2501 collect_full_tree_pair(db, format, left_tree, right_tree)?
2502 } else {
2503 changed_tree_entries(db, format, left_tree, right_tree)?
2504 };
2505 diff_name_status_maps_with_renames(
2506 &left_entries,
2507 &right_entries,
2508 left_entries.keys().chain(right_entries.keys()),
2509 options,
2510 |oid| read_blob_bytes(db, oid),
2511 )
2512}
2513
2514pub fn diff_name_status_empty_tree_with_rename_options(
2521 db: &FileObjectDatabase,
2522 format: ObjectFormat,
2523 right_tree: &ObjectId,
2524 options: RenameDetectionOptions,
2525) -> Result<Vec<NameStatusEntry>> {
2526 let left_entries = BTreeMap::new();
2527 let mut right_entries = BTreeMap::new();
2528 collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2529 diff_name_status_maps_with_renames(
2530 &left_entries,
2531 &right_entries,
2532 right_entries.keys(),
2533 options,
2534 |oid| read_blob_bytes(db, oid),
2535 )
2536}
2537
2538fn read_blob_bytes(db: &FileObjectDatabase, oid: &ObjectId) -> Option<Vec<u8>> {
2543 match db.read_object(oid) {
2544 Ok(object) if object.object_type == ObjectType::Blob => Some(object.body.clone()),
2545 _ => None,
2546 }
2547}
2548
2549fn raw_name_status_changes_for_unique_paths<'a>(
2552 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2553 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2554 paths: impl Iterator<Item = &'a Vec<u8>>,
2555) -> Vec<NameStatusEntry> {
2556 let mut changes = Vec::new();
2557 for path in paths {
2558 let left = left_entries.get(path);
2559 let right = right_entries.get(path);
2560 let status = match (left, right) {
2561 (None, Some(_)) => Some(NameStatus::Added),
2562 (Some(_), None) => Some(NameStatus::Deleted),
2563 (Some(left), Some(right)) if left != right => Some(NameStatus::Modified),
2564 _ => None,
2565 };
2566 if let Some(status) = status {
2567 changes.push(NameStatusEntry {
2568 status,
2569 path: path.clone().into(),
2570 old_path: None,
2571 old_mode: left.map(|entry| entry.mode),
2572 new_mode: right.map(|entry| entry.mode),
2573 old_oid: left.map(|entry| entry.oid),
2574 new_oid: right.map(|entry| entry.oid),
2575 });
2576 }
2577 }
2578 changes
2579}
2580
2581fn diff_name_status_maps<'a>(
2582 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2583 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2584 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2585 options: DiffNameStatusOptions,
2586) -> Result<Vec<NameStatusEntry>> {
2587 let paths = candidate_path_set(candidate_paths);
2588 diff_name_status_maps_for_path_set(left_entries, right_entries, &paths, options)
2589}
2590
2591fn diff_name_status_maps_for_path_set(
2592 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2593 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2594 candidate_paths: &BTreeSet<Vec<u8>>,
2595 options: DiffNameStatusOptions,
2596) -> Result<Vec<NameStatusEntry>> {
2597 diff_name_status_maps_for_unique_paths(
2598 left_entries,
2599 right_entries,
2600 candidate_paths.iter(),
2601 options,
2602 )
2603}
2604
2605fn diff_name_status_maps_for_unique_paths<'a>(
2606 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2607 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2608 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2609 options: DiffNameStatusOptions,
2610) -> Result<Vec<NameStatusEntry>> {
2611 let mut changes =
2612 raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2613 if options.detect_renames {
2614 changes = detect_exact_renames(changes, left_entries, right_entries, options.rename_empty);
2615 }
2616 if options.detect_copies {
2617 changes = detect_exact_copies(
2618 changes,
2619 left_entries,
2620 right_entries,
2621 options.find_copies_harder,
2622 options.rename_empty,
2623 );
2624 }
2625 Ok(changes)
2626}
2627
2628fn diff_name_status_maps_with_renames<'a>(
2637 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2638 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2639 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2640 options: RenameDetectionOptions,
2641 fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2642) -> Result<Vec<NameStatusEntry>> {
2643 let paths = candidate_path_set(candidate_paths);
2644 diff_name_status_maps_with_renames_for_path_set(
2645 left_entries,
2646 right_entries,
2647 &paths,
2648 options,
2649 fetch_blob,
2650 )
2651}
2652
2653fn diff_name_status_maps_with_renames_for_path_set(
2654 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2655 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2656 candidate_paths: &BTreeSet<Vec<u8>>,
2657 options: RenameDetectionOptions,
2658 fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2659) -> Result<Vec<NameStatusEntry>> {
2660 diff_name_status_maps_with_renames_for_unique_paths(
2661 left_entries,
2662 right_entries,
2663 candidate_paths.iter(),
2664 options,
2665 fetch_blob,
2666 )
2667}
2668
2669fn diff_name_status_maps_with_renames_for_unique_paths<'a>(
2670 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2671 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2672 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2673 options: RenameDetectionOptions,
2674 fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2675) -> Result<Vec<NameStatusEntry>> {
2676 let base = options.base;
2677 let mut changes =
2678 raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2679 if base.detect_renames {
2680 changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
2681 }
2682 if base.detect_renames && options.detect_inexact {
2686 changes = detect_inexact_renames(changes, &options, &fetch_blob);
2687 }
2688 if base.detect_copies {
2689 changes = detect_exact_copies(
2690 changes,
2691 left_entries,
2692 right_entries,
2693 base.find_copies_harder,
2694 base.rename_empty,
2695 );
2696 }
2697 if base.detect_copies && options.detect_inexact {
2698 changes = detect_inexact_copies(changes, left_entries, &options, &fetch_blob);
2699 }
2700 Ok(changes)
2701}
2702
2703fn detect_exact_renames(
2704 changes: Vec<NameStatusEntry>,
2705 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2706 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2707 rename_empty: bool,
2708) -> Vec<NameStatusEntry> {
2709 let added = changes
2710 .iter()
2711 .enumerate()
2712 .filter(|(_, entry)| entry.status == NameStatus::Added)
2713 .map(|(idx, entry)| (idx, entry.path.clone()))
2714 .collect::<Vec<_>>();
2715 let deleted = changes
2716 .iter()
2717 .filter(|entry| entry.status == NameStatus::Deleted)
2718 .map(|entry| entry.path.clone())
2719 .collect::<Vec<_>>();
2720 let mut consumed = BTreeSet::new();
2721 let mut renamed_old_paths = BTreeSet::new();
2722 let mut result = Vec::new();
2723
2724 for old_path in deleted {
2725 let Some(left) = left_entries.get(old_path.as_bytes()) else {
2726 continue;
2727 };
2728 if let Some((idx, new_path)) = added.iter().find(|(idx, new_path)| {
2729 !consumed.contains(idx)
2730 && right_entries.get(new_path.as_bytes()).is_some_and(|right| {
2731 right.oid == left.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2732 })
2733 }) {
2734 consumed.insert(*idx);
2735 renamed_old_paths.insert(old_path.clone());
2736 let right = right_entries.get(new_path.as_bytes());
2737 result.push(NameStatusEntry {
2738 status: NameStatus::Renamed(100),
2739 path: new_path.clone(),
2740 old_path: Some(old_path),
2741 old_mode: Some(left.mode),
2742 new_mode: right.map(|entry| entry.mode),
2743 old_oid: Some(left.oid),
2744 new_oid: right.map(|entry| entry.oid),
2745 });
2746 }
2747 }
2748
2749 for (idx, entry) in changes.into_iter().enumerate() {
2750 if entry.status == NameStatus::Added && consumed.contains(&idx) {
2751 continue;
2752 }
2753 if entry.status == NameStatus::Deleted && renamed_old_paths.contains(&entry.path) {
2754 continue;
2755 }
2756 result.push(entry);
2757 }
2758 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2759 result
2760}
2761
2762fn detect_exact_copies(
2763 changes: Vec<NameStatusEntry>,
2764 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2765 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2766 find_copies_harder: bool,
2767 rename_empty: bool,
2768) -> Vec<NameStatusEntry> {
2769 let changed_sources = changes
2770 .iter()
2771 .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
2772 .map(|entry| entry.path.clone())
2773 .collect::<BTreeSet<_>>();
2774 let source_paths = left_entries
2775 .keys()
2776 .filter(|path| find_copies_harder || changed_sources.contains(path.as_slice()))
2777 .cloned()
2778 .collect::<Vec<_>>();
2779
2780 let mut result = Vec::new();
2781 for entry in changes {
2782 if entry.status != NameStatus::Added {
2783 result.push(entry);
2784 continue;
2785 }
2786 let Some(right) = right_entries.get(entry.path.as_bytes()) else {
2787 result.push(entry);
2788 continue;
2789 };
2790 if let Some(old_path) = source_paths.iter().find(|old_path| {
2791 old_path.as_slice() != entry.path.as_bytes()
2792 && left_entries.get(*old_path).is_some_and(|left| {
2793 left.oid == right.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2794 })
2795 }) {
2796 result.push(NameStatusEntry {
2797 status: NameStatus::Copied(100),
2798 path: entry.path,
2799 old_path: Some(old_path.clone().into()),
2800 old_mode: left_entries
2801 .get(old_path.as_slice())
2802 .map(|entry| entry.mode),
2803 new_mode: entry.new_mode,
2804 old_oid: left_entries.get(old_path.as_slice()).map(|entry| entry.oid),
2805 new_oid: entry.new_oid,
2806 });
2807 } else {
2808 result.push(entry);
2809 }
2810 }
2811 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2812 result
2813}
2814
2815#[derive(Debug, Clone)]
2818struct RenameSourceMeta {
2819 path: BString,
2820 mode: Option<u32>,
2821 oid: Option<ObjectId>,
2822}
2823
2824struct ScoredPair {
2827 src: usize,
2829 dst: usize,
2831 score: u8,
2833}
2834
2835fn detect_inexact_renames(
2846 changes: Vec<NameStatusEntry>,
2847 options: &RenameDetectionOptions,
2848 fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
2849) -> Vec<NameStatusEntry> {
2850 let threshold = options.rename_threshold;
2851 if threshold > 100 {
2853 return changes;
2854 }
2855
2856 let mut deleted: Vec<(usize, Vec<u8>)> = Vec::new();
2859 let mut added: Vec<(usize, Vec<u8>)> = Vec::new();
2860 for (idx, entry) in changes.iter().enumerate() {
2861 match entry.status {
2862 NameStatus::Deleted => {
2863 let Some(oid) = entry.old_oid.as_ref() else {
2864 continue;
2865 };
2866 if !options.base.rename_empty && is_empty_blob_oid(oid) {
2867 continue;
2868 }
2869 if let Some(bytes) = fetch_blob(oid) {
2870 deleted.push((idx, bytes));
2871 }
2872 }
2873 NameStatus::Added => {
2874 let Some(oid) = entry.new_oid.as_ref() else {
2875 continue;
2876 };
2877 if !options.base.rename_empty && is_empty_blob_oid(oid) {
2878 continue;
2879 }
2880 if let Some(bytes) = fetch_blob(oid) {
2881 added.push((idx, bytes));
2882 }
2883 }
2884 _ => {}
2885 }
2886 }
2887
2888 if deleted.is_empty() || added.is_empty() {
2889 return changes;
2890 }
2891
2892 let mut pairs: Vec<ScoredPair> = Vec::new();
2894 for (si, (_, src_bytes)) in deleted.iter().enumerate() {
2895 for (di, (_, dst_bytes)) in added.iter().enumerate() {
2896 let score = blob_similarity(src_bytes, dst_bytes);
2897 if score >= threshold {
2898 pairs.push(ScoredPair {
2899 src: si,
2900 dst: di,
2901 score,
2902 });
2903 }
2904 }
2905 }
2906 pairs.sort_by(|a, b| {
2909 b.score
2910 .cmp(&a.score)
2911 .then_with(|| a.src.cmp(&b.src))
2912 .then_with(|| a.dst.cmp(&b.dst))
2913 });
2914
2915 let mut src_used = vec![false; deleted.len()];
2917 let mut dst_used = vec![false; added.len()];
2918 let mut rename_of: BTreeMap<usize, (usize, u8)> = BTreeMap::new();
2920 for pair in pairs {
2921 if src_used[pair.src] || dst_used[pair.dst] {
2922 continue;
2923 }
2924 src_used[pair.src] = true;
2925 dst_used[pair.dst] = true;
2926 let src_change_idx = deleted[pair.src].0;
2927 let dst_change_idx = added[pair.dst].0;
2928 rename_of.insert(dst_change_idx, (src_change_idx, pair.score));
2929 }
2930
2931 if rename_of.is_empty() {
2932 return changes;
2933 }
2934
2935 let consumed_sources: BTreeSet<usize> =
2938 rename_of.values().map(|(src_idx, _)| *src_idx).collect();
2939 let source_meta: BTreeMap<usize, RenameSourceMeta> = consumed_sources
2940 .iter()
2941 .map(|&src_idx| {
2942 let src = &changes[src_idx];
2943 (
2944 src_idx,
2945 RenameSourceMeta {
2946 path: src.path.clone(),
2947 mode: src.old_mode,
2948 oid: src.old_oid,
2949 },
2950 )
2951 })
2952 .collect();
2953
2954 let mut result = Vec::with_capacity(changes.len());
2955 for (idx, entry) in changes.into_iter().enumerate() {
2956 if consumed_sources.contains(&idx) {
2957 continue;
2959 }
2960 if let Some((src_idx, score)) = rename_of.get(&idx) {
2961 let meta = source_meta
2965 .get(src_idx)
2966 .cloned()
2967 .unwrap_or(RenameSourceMeta {
2968 path: BString::default(),
2969 mode: None,
2970 oid: None,
2971 });
2972 result.push(NameStatusEntry {
2973 status: NameStatus::Renamed(*score),
2974 path: entry.path,
2975 old_path: Some(meta.path),
2976 old_mode: meta.mode,
2977 new_mode: entry.new_mode,
2978 old_oid: meta.oid,
2979 new_oid: entry.new_oid,
2980 });
2981 continue;
2982 }
2983 result.push(entry);
2984 }
2985
2986 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2987 result
2988}
2989
2990fn detect_inexact_copies(
3000 changes: Vec<NameStatusEntry>,
3001 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3002 options: &RenameDetectionOptions,
3003 fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
3004) -> Vec<NameStatusEntry> {
3005 let threshold = options.copy_threshold;
3006 if threshold > 100 {
3007 return changes;
3008 }
3009
3010 let changed_sources = changes
3011 .iter()
3012 .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
3013 .map(|entry| entry.path.clone())
3014 .collect::<BTreeSet<_>>();
3015 let mut sources: Vec<(Vec<u8>, &TrackedEntry, Vec<u8>)> = Vec::new();
3017 for (path, tracked) in left_entries {
3018 if !(options.base.find_copies_harder || changed_sources.contains(path.as_slice())) {
3019 continue;
3020 }
3021 if !options.base.rename_empty && is_empty_blob_oid(&tracked.oid) {
3022 continue;
3023 }
3024 if let Some(bytes) = fetch_blob(&tracked.oid) {
3025 sources.push((path.clone(), tracked, bytes));
3026 }
3027 }
3028 if sources.is_empty() {
3029 return changes;
3030 }
3031
3032 let mut result = Vec::with_capacity(changes.len());
3033 for entry in changes {
3034 if entry.status != NameStatus::Added {
3035 result.push(entry);
3036 continue;
3037 }
3038 let Some(new_oid) = entry.new_oid.as_ref() else {
3039 result.push(entry);
3040 continue;
3041 };
3042 let Some(dst_bytes) = fetch_blob(new_oid) else {
3043 result.push(entry);
3044 continue;
3045 };
3046
3047 let mut best: Option<(usize, u8)> = None;
3051 for (i, (src_path, _, src_bytes)) in sources.iter().enumerate() {
3052 if src_path.as_slice() == entry.path.as_bytes() {
3053 continue;
3054 }
3055 let score = blob_similarity(src_bytes, &dst_bytes);
3056 if score < threshold {
3057 continue;
3058 }
3059 match best {
3060 Some((_, best_score)) if best_score >= score => {}
3061 _ => best = Some((i, score)),
3062 }
3063 }
3064
3065 if let Some((src_idx, score)) = best {
3066 let (src_path, src_tracked, _) = &sources[src_idx];
3067 result.push(NameStatusEntry {
3068 status: NameStatus::Copied(score),
3069 path: entry.path,
3070 old_path: Some(src_path.clone().into()),
3071 old_mode: Some(src_tracked.mode),
3072 new_mode: entry.new_mode,
3073 old_oid: Some(src_tracked.oid),
3074 new_oid: entry.new_oid,
3075 });
3076 } else {
3077 result.push(entry);
3078 }
3079 }
3080 result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
3081 result
3082}
3083
3084fn is_empty_blob_oid(oid: &ObjectId) -> bool {
3085 object_id_for_bytes(oid.format(), "blob", b"").is_ok_and(|empty| empty == *oid)
3086}
3087
3088const MAX_SPAN_BYTES: usize = 64;
3129
3130pub fn blob_similarity(a: &[u8], b: &[u8]) -> u8 {
3142 if a == b {
3144 return 100;
3145 }
3146 let max_size = a.len().max(b.len());
3147 if max_size == 0 {
3148 return 100;
3151 }
3152
3153 let src = span_hash_counts(a);
3154 let dst = span_hash_counts(b);
3155 let common = common_span_bytes(&src, &dst);
3156
3157 const MAX_SCORE: u64 = 60000;
3165 let internal = (common as u64 * MAX_SCORE) / max_size as u64;
3166 let score = internal * 100 / MAX_SCORE;
3167 score.min(100) as u8
3168}
3169
3170fn span_hash_counts(data: &[u8]) -> BTreeMap<u64, usize> {
3177 let mut counts: BTreeMap<u64, usize> = BTreeMap::new();
3178 let mut idx = 0usize;
3179 let len = data.len();
3180 while idx < len {
3181 let mut accum1: u32 = 0;
3186 let mut accum2: u32 = 0;
3187 let mut span_len = 0usize;
3188 loop {
3189 let c = data[idx] as u32;
3190 idx += 1;
3191 span_len += 1;
3192 accum1 = (accum1 << 7) ^ (accum2 >> 25);
3193 accum2 = (accum2 << 7) ^ (accum1 >> 25);
3194 accum1 = accum1.wrapping_add(c);
3195 let newline = c == u32::from(b'\n');
3196 if span_len >= MAX_SPAN_BYTES || newline || idx >= len {
3197 break;
3198 }
3199 }
3200 let hash = ((accum1 as u64) << 32) ^ (accum2 as u64) ^ ((span_len as u64) << 1);
3204 *counts.entry(hash).or_insert(0) += span_len;
3205 }
3206 counts
3207}
3208
3209pub fn count_changes(src: &[u8], dst: &[u8]) -> (usize, usize) {
3218 let src_counts = span_hash_counts(src);
3219 let dst_counts = span_hash_counts(dst);
3220 let copied = common_span_bytes(&src_counts, &dst_counts);
3221 (copied, dst.len() - copied)
3222}
3223
3224fn common_span_bytes(src: &BTreeMap<u64, usize>, dst: &BTreeMap<u64, usize>) -> usize {
3225 let mut common = 0usize;
3226 let (small, large) = if src.len() <= dst.len() {
3228 (src, dst)
3229 } else {
3230 (dst, src)
3231 };
3232 for (hash, small_bytes) in small {
3233 if let Some(large_bytes) = large.get(hash) {
3234 common += (*small_bytes).min(*large_bytes);
3235 }
3236 }
3237 common
3238}
3239
3240fn diff_entry_sort_path(entry: &NameStatusEntry) -> &[u8] {
3241 entry.path.as_bytes()
3244}
3245
3246fn mark_unstaged_worktree_oids_unresolved(
3247 changes: Vec<NameStatusEntry>,
3248 index_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3249 worktree_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3250) -> Vec<NameStatusEntry> {
3251 changes
3252 .into_iter()
3253 .map(|mut entry| {
3254 let worktree_entry = worktree_entries.get(entry.path.as_bytes());
3255 if worktree_entry != index_entries.get(entry.path.as_bytes()) {
3256 entry.new_oid = None;
3257 }
3258 entry
3259 })
3260 .collect()
3261}
3262
3263#[derive(Debug, Clone, PartialEq, Eq)]
3264struct TrackedEntry {
3265 mode: u32,
3266 oid: ObjectId,
3267}
3268
3269type TrackedEntryMap = BTreeMap<Vec<u8>, TrackedEntry>;
3272
3273type TrackedEntryPair = (TrackedEntryMap, TrackedEntryMap);
3275
3276struct IndexSnapshot {
3277 entries: BTreeMap<Vec<u8>, TrackedEntry>,
3278 stat_cache: IndexStatCache,
3279}
3280
3281fn read_index_entries(
3282 git_dir: &Path,
3283 format: ObjectFormat,
3284) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3285 let index_path = sley_index::repository_index_path(git_dir);
3286 if !index_path.exists() {
3287 return Ok(BTreeMap::new());
3288 }
3289 let index = expand_sparse_index_for_worktree_diff(
3290 sley_index::read_repository_index(git_dir, format)?,
3291 git_dir,
3292 format,
3293 )?;
3294 Ok(index
3295 .entries
3296 .into_iter()
3297 .filter(|entry| entry.stage() == sley_index::Stage::Normal && !entry.is_intent_to_add())
3298 .map(|entry| {
3299 (
3300 entry.path.into_bytes(),
3301 TrackedEntry {
3302 mode: entry.mode,
3303 oid: entry.oid,
3304 },
3305 )
3306 })
3307 .collect())
3308}
3309
3310fn read_intent_to_add_paths(
3314 git_dir: &Path,
3315 format: ObjectFormat,
3316) -> Result<std::collections::HashSet<Vec<u8>>> {
3317 let index_path = sley_index::repository_index_path(git_dir);
3318 if !index_path.exists() {
3319 return Ok(std::collections::HashSet::new());
3320 }
3321 let index = expand_sparse_index_for_worktree_diff(
3322 sley_index::read_repository_index(git_dir, format)?,
3323 git_dir,
3324 format,
3325 )?;
3326 Ok(index
3327 .entries
3328 .iter()
3329 .filter(|entry| entry.stage() == sley_index::Stage::Normal && entry.is_intent_to_add())
3330 .map(|entry| entry.path.as_bytes().to_vec())
3331 .collect())
3332}
3333
3334fn read_index_snapshot(git_dir: &Path, format: ObjectFormat) -> Result<IndexSnapshot> {
3335 let index_path = sley_index::repository_index_path(git_dir);
3336 let index_metadata = match fs::metadata(&index_path) {
3337 Ok(metadata) => metadata,
3338 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3339 return Ok(IndexSnapshot {
3340 entries: BTreeMap::new(),
3341 stat_cache: IndexStatCache::default(),
3342 });
3343 }
3344 Err(err) => return Err(err.into()),
3345 };
3346 let index = expand_sparse_index_for_worktree_diff(
3347 sley_index::read_repository_index(git_dir, format)?,
3348 git_dir,
3349 format,
3350 )?;
3351 let stat_cache =
3352 IndexStatCache::from_index_mtime(&index, sley_index::file_mtime_parts(&index_metadata));
3353 let entries = index
3354 .entries
3355 .into_iter()
3356 .map(|entry| {
3357 (
3358 entry.path.into_bytes(),
3359 TrackedEntry {
3360 mode: entry.mode,
3361 oid: entry.oid,
3362 },
3363 )
3364 })
3365 .collect();
3366 Ok(IndexSnapshot {
3367 entries,
3368 stat_cache,
3369 })
3370}
3371
3372trait WorktreeIndexEntry {
3373 fn git_path(&self) -> &[u8];
3374 fn stage(&self) -> sley_index::Stage;
3375 fn mode(&self) -> u32;
3376 fn oid(&self) -> ObjectId;
3377 fn is_intent_to_add(&self) -> bool;
3378 fn is_skip_worktree(&self) -> bool;
3379 fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool;
3380}
3381
3382impl WorktreeIndexEntry for sley_index::IndexEntry {
3383 fn git_path(&self) -> &[u8] {
3384 self.path.as_bytes()
3385 }
3386
3387 fn stage(&self) -> sley_index::Stage {
3388 sley_index::IndexEntry::stage(self)
3389 }
3390
3391 fn mode(&self) -> u32 {
3392 self.mode
3393 }
3394
3395 fn oid(&self) -> ObjectId {
3396 self.oid
3397 }
3398
3399 fn is_intent_to_add(&self) -> bool {
3400 sley_index::IndexEntry::is_intent_to_add(self)
3401 }
3402
3403 fn is_skip_worktree(&self) -> bool {
3404 sley_index::IndexEntry::is_skip_worktree(self)
3405 }
3406
3407 fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
3408 stat_cache.reusable_index_entry(self, metadata).is_some()
3409 }
3410}
3411
3412impl WorktreeIndexEntry for sley_index::IndexEntryRef<'_> {
3413 fn git_path(&self) -> &[u8] {
3414 self.path
3415 }
3416
3417 fn stage(&self) -> sley_index::Stage {
3418 sley_index::IndexEntryRef::stage(self)
3419 }
3420
3421 fn mode(&self) -> u32 {
3422 self.mode
3423 }
3424
3425 fn oid(&self) -> ObjectId {
3426 self.oid
3427 }
3428
3429 fn is_intent_to_add(&self) -> bool {
3430 sley_index::IndexEntryRef::is_intent_to_add(self)
3431 }
3432
3433 fn is_skip_worktree(&self) -> bool {
3434 sley_index::IndexEntryRef::is_skip_worktree(self)
3435 }
3436
3437 fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
3438 stat_cache.reusable_index_entry_ref(self, metadata)
3439 }
3440}
3441
3442fn tracked_entry_from_index(entry: &impl WorktreeIndexEntry) -> TrackedEntry {
3443 TrackedEntry {
3444 mode: entry.mode(),
3445 oid: entry.oid(),
3446 }
3447}
3448
3449fn head_tree_entries(
3450 git_dir: &Path,
3451 format: ObjectFormat,
3452 db: &FileObjectDatabase,
3453) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3454 let refs = FileRefStore::new(git_dir, format);
3455 let Some(head) = refs.read_ref("HEAD")? else {
3456 return Ok(BTreeMap::new());
3457 };
3458 let commit_oid = match head {
3459 RefTarget::Direct(oid) => Some(oid),
3460 RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
3461 Some(RefTarget::Direct(oid)) => Some(oid),
3462 _ => None,
3463 },
3464 };
3465 let Some(commit_oid) = commit_oid else {
3466 return Ok(BTreeMap::new());
3467 };
3468 let object = db.read_object(&commit_oid)?;
3469 if object.object_type != ObjectType::Commit {
3470 return Err(GitError::InvalidObject(format!(
3471 "HEAD {commit_oid} is not a commit"
3472 )));
3473 }
3474 let commit = Commit::parse_ref(format, &object.body)?;
3475 let mut entries = BTreeMap::new();
3476 collect_tree_entries(db, format, &commit.tree, Vec::new(), &mut entries)?;
3477 Ok(entries)
3478}
3479
3480fn collect_tree_entries(
3489 db: &FileObjectDatabase,
3490 format: ObjectFormat,
3491 tree_oid: &ObjectId,
3492 prefix: Vec<u8>,
3493 entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3494) -> Result<()> {
3495 for (rel_path, (mode, oid)) in flatten_tree(db, format, tree_oid)? {
3496 let path = join_tree_path(&prefix, &rel_path);
3497 entries.insert(path, TrackedEntry { mode, oid });
3498 }
3499 Ok(())
3500}
3501
3502const TREE_ENTRY_MODE: u32 = 0o040000;
3504
3505fn read_tree_object(
3509 db: &FileObjectDatabase,
3510 format: ObjectFormat,
3511 tree_oid: &ObjectId,
3512) -> Result<Tree> {
3513 let object = db.read_object(tree_oid)?;
3514 if object.object_type != ObjectType::Tree {
3515 return Err(GitError::InvalidObject(format!(
3516 "expected tree {tree_oid}, found {}",
3517 object.object_type.as_str()
3518 )));
3519 }
3520 Tree::parse(format, &object.body)
3521}
3522
3523fn join_tree_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
3526 let mut path = Vec::with_capacity(prefix.len() + 1 + name.len());
3527 path.extend_from_slice(prefix);
3528 if !path.is_empty() {
3529 path.push(b'/');
3530 }
3531 path.extend_from_slice(name);
3532 path
3533}
3534
3535fn collect_full_tree_pair(
3539 db: &FileObjectDatabase,
3540 format: ObjectFormat,
3541 left_tree: &ObjectId,
3542 right_tree: &ObjectId,
3543) -> Result<TrackedEntryPair> {
3544 let mut left = BTreeMap::new();
3545 collect_tree_entries(db, format, left_tree, Vec::new(), &mut left)?;
3546 let mut right = BTreeMap::new();
3547 collect_tree_entries(db, format, right_tree, Vec::new(), &mut right)?;
3548 Ok((left, right))
3549}
3550
3551fn changed_tree_entries(
3567 db: &FileObjectDatabase,
3568 format: ObjectFormat,
3569 left_tree: &ObjectId,
3570 right_tree: &ObjectId,
3571) -> Result<TrackedEntryPair> {
3572 let mut left = BTreeMap::new();
3573 let mut right = BTreeMap::new();
3574 if left_tree != right_tree {
3576 diff_tree_pair(
3577 db,
3578 format,
3579 left_tree,
3580 right_tree,
3581 &[],
3582 &mut left,
3583 &mut right,
3584 )?;
3585 }
3586 Ok((left, right))
3587}
3588
3589fn diff_tree_pair(
3593 db: &FileObjectDatabase,
3594 format: ObjectFormat,
3595 left_tree: &ObjectId,
3596 right_tree: &ObjectId,
3597 prefix: &[u8],
3598 left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3599 right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3600) -> Result<()> {
3601 let left_entries = read_tree_object(db, format, left_tree)?.entries;
3602 let right_entries = read_tree_object(db, format, right_tree)?.entries;
3603
3604 let mut right_by_name: HashMap<&[u8], &TreeEntry> = HashMap::with_capacity(right_entries.len());
3609 for entry in &right_entries {
3610 right_by_name.insert(entry.name.as_bytes(), entry);
3611 }
3612
3613 for left_entry in &left_entries {
3614 match right_by_name.remove(left_entry.name.as_bytes()) {
3615 Some(right_entry) => {
3616 merge_tree_entry(
3617 db,
3618 format,
3619 prefix,
3620 Some(left_entry),
3621 Some(right_entry),
3622 left,
3623 right,
3624 )?;
3625 }
3626 None => {
3627 merge_tree_entry(db, format, prefix, Some(left_entry), None, left, right)?;
3628 }
3629 }
3630 }
3631 for right_entry in &right_entries {
3633 if right_by_name.contains_key(right_entry.name.as_bytes()) {
3634 merge_tree_entry(db, format, prefix, None, Some(right_entry), left, right)?;
3635 }
3636 }
3637 Ok(())
3638}
3639
3640fn merge_tree_entry(
3653 db: &FileObjectDatabase,
3654 format: ObjectFormat,
3655 prefix: &[u8],
3656 left_entry: Option<&TreeEntry>,
3657 right_entry: Option<&TreeEntry>,
3658 left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3659 right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3660) -> Result<()> {
3661 let left_is_tree = left_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3662 let right_is_tree = right_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3663
3664 if let (Some(left_entry), Some(right_entry)) = (left_entry, right_entry) {
3665 if left_is_tree && right_is_tree {
3666 if left_entry.oid == right_entry.oid {
3668 return Ok(());
3669 }
3670 let path = join_tree_path(prefix, left_entry.name.as_bytes());
3671 return diff_tree_pair(
3672 db,
3673 format,
3674 &left_entry.oid,
3675 &right_entry.oid,
3676 &path,
3677 left,
3678 right,
3679 );
3680 }
3681 if !left_is_tree && !right_is_tree {
3682 if left_entry.mode == right_entry.mode && left_entry.oid == right_entry.oid {
3686 return Ok(());
3687 }
3688 let path = join_tree_path(prefix, left_entry.name.as_bytes());
3689 left.insert(
3690 path.clone(),
3691 TrackedEntry {
3692 mode: left_entry.mode,
3693 oid: left_entry.oid,
3694 },
3695 );
3696 right.insert(
3697 path,
3698 TrackedEntry {
3699 mode: right_entry.mode,
3700 oid: right_entry.oid,
3701 },
3702 );
3703 return Ok(());
3704 }
3705 }
3708
3709 if let Some(left_entry) = left_entry {
3711 let path = join_tree_path(prefix, left_entry.name.as_bytes());
3712 if left_is_tree {
3713 collect_tree_entries(db, format, &left_entry.oid, path, left)?;
3714 } else {
3715 left.insert(
3716 path,
3717 TrackedEntry {
3718 mode: left_entry.mode,
3719 oid: left_entry.oid,
3720 },
3721 );
3722 }
3723 }
3724 if let Some(right_entry) = right_entry {
3726 let path = join_tree_path(prefix, right_entry.name.as_bytes());
3727 if right_is_tree {
3728 collect_tree_entries(db, format, &right_entry.oid, path, right)?;
3729 } else {
3730 right.insert(
3731 path,
3732 TrackedEntry {
3733 mode: right_entry.mode,
3734 oid: right_entry.oid,
3735 },
3736 );
3737 }
3738 }
3739 Ok(())
3740}
3741
3742fn index_gitlinks(index: &BTreeMap<Vec<u8>, TrackedEntry>) -> BTreeMap<Vec<u8>, ObjectId> {
3743 index
3744 .iter()
3745 .filter(|(_, entry)| sley_index::is_gitlink(entry.mode))
3746 .map(|(path, entry)| (path.clone(), entry.oid))
3747 .collect()
3748}
3749
3750fn candidate_path_set<'a>(candidate_paths: impl Iterator<Item = &'a Vec<u8>>) -> BTreeSet<Vec<u8>> {
3751 candidate_paths.cloned().collect()
3752}
3753
3754fn worktree_entries_for_path_set(
3755 worktree_root: &Path,
3756 format: ObjectFormat,
3757 candidates: &BTreeSet<Vec<u8>>,
3758 index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3759 stat_cache: Option<&IndexStatCache>,
3760) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3761 worktree_entries_for_unique_paths(
3762 worktree_root,
3763 format,
3764 candidates.iter(),
3765 index_gitlinks,
3766 stat_cache,
3767 )
3768}
3769
3770fn worktree_entries_for_unique_paths<'a>(
3771 worktree_root: &Path,
3772 format: ObjectFormat,
3773 candidates: impl Iterator<Item = &'a Vec<u8>>,
3774 index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3775 stat_cache: Option<&IndexStatCache>,
3776) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3777 let mut entries = BTreeMap::new();
3778 for git_path in candidates {
3779 if let Some(entry) =
3780 worktree_entry_for_path(worktree_root, format, git_path, index_gitlinks, stat_cache)?
3781 {
3782 entries.insert(git_path.clone(), entry);
3783 }
3784 }
3785 Ok(entries)
3786}
3787
3788fn worktree_entry_for_path(
3789 worktree_root: &Path,
3790 format: ObjectFormat,
3791 git_path: &[u8],
3792 index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3793 stat_cache: Option<&IndexStatCache>,
3794) -> Result<Option<TrackedEntry>> {
3795 let path = worktree_path_for_repo_path(worktree_root, git_path);
3796 let metadata = match fs::symlink_metadata(&path) {
3797 Ok(metadata) => metadata,
3798 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3799 Err(err) => return Err(GitError::Io(err.to_string())),
3800 };
3801 let file_type = metadata.file_type();
3802 if let Some(staged_oid) = index_gitlinks.get(git_path)
3803 && metadata.is_dir()
3804 {
3805 let oid = gitlink_head_oid(&path, format).unwrap_or(*staged_oid);
3806 return Ok(Some(TrackedEntry {
3807 mode: sley_index::GITLINK_MODE,
3808 oid,
3809 }));
3810 }
3811 if metadata.is_dir() {
3812 if let Some(oid) = gitlink_head_oid(&path, format) {
3813 return Ok(Some(TrackedEntry {
3814 mode: sley_index::GITLINK_MODE,
3815 oid,
3816 }));
3817 }
3818 return Ok(None);
3819 }
3820 if !(metadata.is_file() || file_type.is_symlink()) {
3821 return Ok(None);
3822 }
3823 if let Some(entry) = stat_cache.and_then(|cache| cache.reusable_entry(git_path, &metadata)) {
3824 return Ok(Some(tracked_entry_from_index(entry)));
3825 }
3826 let body = if file_type.is_symlink() {
3827 symlink_target_bytes(&path)?
3828 } else {
3829 fs::read(&path)?
3830 };
3831 let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3832 let mode = if file_type.is_symlink() {
3833 0o120000
3834 } else {
3835 file_mode(&metadata)
3836 };
3837 Ok(Some(TrackedEntry { mode, oid }))
3838}
3839
3840fn index_worktree_change_for_entry(
3841 path: &Path,
3842 format: ObjectFormat,
3843 index_entry: &impl WorktreeIndexEntry,
3844 stat_cache: &IndexStatCache,
3845) -> Result<Option<NameStatusEntry>> {
3846 let git_path = index_entry.git_path();
3847 let metadata = match fs::symlink_metadata(path) {
3848 Ok(metadata) => metadata,
3849 Err(err)
3850 if err.kind() == std::io::ErrorKind::NotFound && index_entry.is_skip_worktree() =>
3851 {
3852 return Ok(None);
3853 }
3854 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3855 return Ok(Some(index_worktree_deleted_entry(index_entry)));
3856 }
3857 Err(err) => return Err(GitError::Io(err.to_string())),
3858 };
3859 let file_type = metadata.file_type();
3860 let right = if metadata.is_dir() {
3861 if sley_index::is_gitlink(index_entry.mode()) {
3862 let oid = gitlink_head_oid(path, format).unwrap_or(index_entry.oid());
3863 Some(TrackedEntry {
3864 mode: sley_index::GITLINK_MODE,
3865 oid,
3866 })
3867 } else {
3868 gitlink_head_oid(path, format).map(|oid| TrackedEntry {
3869 mode: sley_index::GITLINK_MODE,
3870 oid,
3871 })
3872 }
3873 } else if metadata.is_file() || file_type.is_symlink() {
3874 if index_entry.reusable_with(stat_cache, &metadata) {
3875 return Ok(None);
3876 }
3877 let body = if file_type.is_symlink() {
3878 symlink_target_bytes(path)?
3879 } else {
3880 fs::read(path)?
3881 };
3882 let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3883 let mode = if file_type.is_symlink() {
3884 0o120000
3885 } else {
3886 file_mode(&metadata)
3887 };
3888 Some(TrackedEntry { mode, oid })
3889 } else {
3890 None
3891 };
3892 let Some(right) = right else {
3893 return Ok(Some(index_worktree_deleted_entry(index_entry)));
3894 };
3895 let left = tracked_entry_from_index(index_entry);
3896 if right == left {
3897 return Ok(None);
3898 }
3899 Ok(Some(NameStatusEntry {
3900 status: NameStatus::Modified,
3901 path: git_path.to_vec().into(),
3902 old_path: None,
3903 old_mode: Some(left.mode),
3904 new_mode: Some(right.mode),
3905 old_oid: Some(left.oid),
3906 new_oid: Some(right.oid),
3907 }))
3908}
3909
3910fn index_worktree_deleted_entry(index_entry: &impl WorktreeIndexEntry) -> NameStatusEntry {
3911 NameStatusEntry {
3912 status: NameStatus::Deleted,
3913 path: index_entry.git_path().to_vec().into(),
3914 old_path: None,
3915 old_mode: Some(index_entry.mode()),
3916 new_mode: None,
3917 old_oid: Some(index_entry.oid()),
3918 new_oid: None,
3919 }
3920}
3921
3922fn worktree_blob_cache_for_path_set(
3923 worktree_root: &Path,
3924 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3925 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3926 candidate_paths: &BTreeSet<Vec<u8>>,
3927 options: RenameDetectionOptions,
3928) -> Result<HashMap<ObjectId, Vec<u8>>> {
3929 worktree_blob_cache_for_unique_paths(
3930 worktree_root,
3931 left_entries,
3932 right_entries,
3933 candidate_paths.iter(),
3934 options,
3935 )
3936}
3937
3938fn worktree_blob_cache_for_unique_paths<'a>(
3939 worktree_root: &Path,
3940 left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3941 right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3942 candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
3943 options: RenameDetectionOptions,
3944) -> Result<HashMap<ObjectId, Vec<u8>>> {
3945 if !options.detect_inexact || !(options.base.detect_renames || options.base.detect_copies) {
3946 return Ok(HashMap::new());
3947 }
3948 let base = options.base;
3949 let mut changes =
3950 raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
3951 if base.detect_renames {
3952 changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
3953 }
3954 if base.detect_copies {
3955 changes = detect_exact_copies(
3956 changes,
3957 left_entries,
3958 right_entries,
3959 base.find_copies_harder,
3960 base.rename_empty,
3961 );
3962 }
3963 let has_rename_source = base.detect_renames
3964 && changes.iter().any(|entry| {
3965 entry.status == NameStatus::Deleted
3966 && entry
3967 .old_oid
3968 .as_ref()
3969 .is_some_and(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3970 });
3971 let has_copy_source = base.detect_copies
3972 && (base.find_copies_harder
3973 || changes
3974 .iter()
3975 .any(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified)));
3976 if !has_rename_source && !has_copy_source {
3977 return Ok(HashMap::new());
3978 }
3979 let candidate_oids = changes
3980 .iter()
3981 .filter(|entry| entry.status == NameStatus::Added)
3982 .filter_map(|entry| entry.new_oid)
3983 .filter(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3984 .collect::<BTreeSet<_>>();
3985 if candidate_oids.is_empty() {
3986 return Ok(HashMap::new());
3987 }
3988 let mut cache = HashMap::new();
3989 for (git_path, entry) in right_entries {
3990 if sley_index::is_gitlink(entry.mode) || !candidate_oids.contains(&entry.oid) {
3991 continue;
3992 }
3993 let path = worktree_path_for_repo_path(worktree_root, git_path);
3994 let body = if entry.mode == 0o120000 {
3995 symlink_target_bytes(&path)?
3996 } else {
3997 fs::read(&path)?
3998 };
3999 cache.entry(entry.oid).or_insert(body);
4000 }
4001 Ok(cache)
4002}
4003
4004fn cache_or_odb_blob(
4007 cache: &HashMap<ObjectId, Vec<u8>>,
4008 db: &FileObjectDatabase,
4009 oid: &ObjectId,
4010) -> Option<Vec<u8>> {
4011 if let Some(bytes) = cache.get(oid) {
4012 return Some(bytes.clone());
4013 }
4014 read_blob_bytes(db, oid)
4015}
4016
4017#[cfg(unix)]
4018fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
4019 use std::ffi::OsStr;
4020 use std::os::unix::ffi::OsStrExt;
4021
4022 let mut out = PathBuf::from(worktree_root);
4023 out.push(OsStr::from_bytes(path));
4024 out
4025}
4026
4027#[cfg(unix)]
4028fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
4029 use std::ffi::OsStr;
4030 use std::os::unix::ffi::OsStrExt;
4031
4032 out.clear();
4033 out.push(worktree_root);
4034 out.push(OsStr::from_bytes(path));
4035}
4036
4037#[cfg(not(unix))]
4038fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
4039 worktree_root.join(repo_path_to_path(path))
4040}
4041
4042#[cfg(not(unix))]
4043fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
4044 out.clear();
4045 out.push(worktree_root);
4046 out.push(repo_path_to_path(path));
4047}
4048
4049#[cfg(not(unix))]
4050fn repo_path_to_path(path: &[u8]) -> PathBuf {
4051 let mut out = PathBuf::new();
4052 for component in String::from_utf8_lossy(path).split('/') {
4053 if !component.is_empty() {
4054 out.push(component);
4055 }
4056 }
4057 out
4058}
4059
4060#[cfg(unix)]
4061fn file_mode(metadata: &fs::Metadata) -> u32 {
4062 use std::os::unix::fs::PermissionsExt;
4063 if metadata.permissions().mode() & 0o111 != 0 {
4064 0o100755
4065 } else {
4066 0o100644
4067 }
4068}
4069
4070#[cfg(not(unix))]
4071fn file_mode(_metadata: &fs::Metadata) -> u32 {
4072 0o100644
4073}
4074
4075#[cfg(unix)]
4076fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
4077 use std::os::unix::ffi::OsStrExt;
4078 let target = fs::read_link(path)?;
4079 Ok(target.as_os_str().as_bytes().to_vec())
4080}
4081
4082#[cfg(not(unix))]
4083fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
4084 let target = fs::read_link(path)?;
4085 Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
4086}
4087
4088#[derive(Debug, Clone, PartialEq, Eq)]
4105pub enum HunkLine {
4106 Context(Vec<u8>),
4108 Insert(Vec<u8>),
4110 Delete(Vec<u8>),
4112}
4113
4114impl HunkLine {
4115 pub fn content(&self) -> &[u8] {
4117 match self {
4118 Self::Context(bytes) | Self::Insert(bytes) | Self::Delete(bytes) => bytes,
4119 }
4120 }
4121}
4122
4123#[derive(Debug, Clone, PartialEq, Eq)]
4130pub struct Hunk {
4131 pub old_start: usize,
4132 pub old_len: usize,
4133 pub new_start: usize,
4134 pub new_len: usize,
4135 pub lines: Vec<HunkLine>,
4136 pub old_no_newline: bool,
4138 pub new_no_newline: bool,
4140}
4141
4142#[derive(Debug, Clone, PartialEq, Eq)]
4144pub struct FilePatch {
4145 pub old_path: Option<Vec<u8>>,
4147 pub new_path: Option<Vec<u8>>,
4149 pub old_mode: Option<u32>,
4151 pub new_mode: Option<u32>,
4153 pub hunks: Vec<Hunk>,
4154 pub is_new: bool,
4156 pub is_delete: bool,
4158 pub is_rename: bool,
4160 pub is_copy: bool,
4162 pub similarity: Option<u8>,
4164 pub dissimilarity: Option<u8>,
4166}
4167
4168#[derive(Debug, Clone, PartialEq, Eq)]
4170pub enum ApplyOutcome {
4171 Applied(Vec<u8>),
4173 Rejected,
4175}
4176
4177const MIN_FUZZ_CONTEXT: usize = usize::MAX;
4188
4189pub fn parse_unified_patch(input: &[u8]) -> Result<Vec<FilePatch>> {
4197 parse_unified_patch_with_recount(input, false)
4198}
4199
4200pub fn parse_unified_patch_with_recount(input: &[u8], recount: bool) -> Result<Vec<FilePatch>> {
4203 let lines = split_patch_lines(input);
4204 let mut parser = PatchParser {
4205 lines: &lines,
4206 index: 0,
4207 recount,
4208 };
4209 parser.parse()
4210}
4211
4212pub fn apply_file_patch(base: &[u8], patch: &FilePatch) -> ApplyOutcome {
4237 if patch.is_delete && patch.hunks.is_empty() {
4239 return ApplyOutcome::Applied(Vec::new());
4240 }
4241 let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
4244
4245 let mut image = split_blob_lines(base_for_match);
4249
4250 let mut running_offset: isize = 0;
4253
4254 for hunk in &patch.hunks {
4255 match apply_one_hunk(&mut image, hunk, running_offset) {
4256 Some(drift) => running_offset += drift,
4257 None => return ApplyOutcome::Rejected,
4258 }
4259 }
4260
4261 ApplyOutcome::Applied(join_lines(&image))
4262}
4263
4264fn apply_one_hunk(image: &mut Vec<Line>, hunk: &Hunk, running_offset: isize) -> Option<isize> {
4272 let mut preimage: Vec<Line> = Vec::new();
4276 let mut postimage: Vec<Line> = Vec::new();
4277 let mut leading = 0usize; let mut trailing = 0usize; let mut seen_change = false;
4280 for hl in &hunk.lines {
4281 match hl {
4282 HunkLine::Context(bytes) => {
4283 preimage.push(Line {
4284 content: bytes.clone(),
4285 no_newline: false,
4286 });
4287 postimage.push(Line {
4288 content: bytes.clone(),
4289 no_newline: false,
4290 });
4291 if !seen_change {
4292 leading += 1;
4293 }
4294 trailing += 1;
4295 }
4296 HunkLine::Delete(bytes) => {
4297 preimage.push(Line {
4298 content: bytes.clone(),
4299 no_newline: false,
4300 });
4301 seen_change = true;
4302 trailing = 0;
4303 }
4304 HunkLine::Insert(bytes) => {
4305 postimage.push(Line {
4306 content: bytes.clone(),
4307 no_newline: false,
4308 });
4309 seen_change = true;
4310 trailing = 0;
4311 }
4312 }
4313 }
4314
4315 if hunk.old_no_newline
4319 && let Some(last) = preimage.last_mut()
4320 {
4321 last.no_newline = true;
4322 }
4323 if hunk.new_no_newline
4324 && let Some(last) = postimage.last_mut()
4325 {
4326 last.no_newline = true;
4327 }
4328
4329 let mut match_beginning = hunk.old_start <= 1;
4334 let mut match_end = trailing == 0;
4335
4336 let mut expected = expected_position(hunk, running_offset);
4341 let hunk_expected = expected;
4344
4345 loop {
4346 if let Some(pos) = find_hunk_pos(image, &preimage, expected, match_beginning, match_end) {
4347 let take = preimage.len();
4349 let replacement: Vec<Line> = postimage.clone();
4350 image.splice(pos..pos + take, replacement);
4351 return Some(pos as isize - hunk_expected);
4352 }
4353
4354 #[allow(clippy::absurd_extreme_comparisons)]
4363 if leading <= MIN_FUZZ_CONTEXT && trailing <= MIN_FUZZ_CONTEXT {
4364 return None;
4365 }
4366
4367 if match_beginning || match_end {
4370 match_beginning = false;
4371 match_end = false;
4372 continue;
4373 }
4374
4375 if leading >= trailing {
4377 preimage.remove(0);
4379 postimage.remove(0);
4380 expected -= 1;
4381 leading -= 1;
4382 }
4383 if trailing > leading {
4384 preimage.pop();
4385 postimage.pop();
4386 trailing -= 1;
4387 }
4388 }
4389}
4390
4391#[derive(Debug, Clone, PartialEq, Eq)]
4393struct Line {
4394 content: Vec<u8>,
4395 no_newline: bool,
4396}
4397
4398fn split_blob_lines(data: &[u8]) -> Vec<Line> {
4403 let mut lines = Vec::new();
4404 let mut start = 0usize;
4405 while start < data.len() {
4406 match data[start..].iter().position(|&b| b == b'\n') {
4407 Some(rel) => {
4408 let end = start + rel;
4409 lines.push(Line {
4410 content: data[start..end].to_vec(),
4411 no_newline: false,
4412 });
4413 start = end + 1;
4414 }
4415 None => {
4416 lines.push(Line {
4417 content: data[start..].to_vec(),
4418 no_newline: true,
4419 });
4420 start = data.len();
4421 }
4422 }
4423 }
4424 lines
4425}
4426
4427fn join_lines(lines: &[Line]) -> Vec<u8> {
4429 let mut out = Vec::new();
4430 for line in lines {
4431 out.extend_from_slice(&line.content);
4432 if !line.no_newline {
4433 out.push(b'\n');
4434 }
4435 }
4436 out
4437}
4438
4439fn expected_position(hunk: &Hunk, running_offset: isize) -> isize {
4442 let base = if hunk.old_start == 0 {
4444 0
4445 } else {
4446 hunk.old_start as isize - 1
4447 };
4448 base + running_offset
4449}
4450
4451fn find_hunk_pos(
4459 image: &[Line],
4460 preimage: &[Line],
4461 expected: isize,
4462 match_beginning: bool,
4463 match_end: bool,
4464) -> Option<usize> {
4465 let line_nr = image.len();
4466 let pre_nr = preimage.len();
4467
4468 let mut line: isize = if match_beginning {
4471 0
4472 } else if match_end {
4473 line_nr as isize - pre_nr as isize
4474 } else {
4475 expected
4476 };
4477 if line < 0 {
4478 line = 0;
4479 }
4480 if line as usize > line_nr {
4481 line = line_nr as isize;
4482 }
4483
4484 let start = line as usize;
4485 let mut backwards = start;
4486 let mut forwards = start;
4487 let mut current = start;
4488
4489 let mut i: u64 = 0;
4490 loop {
4491 if preimage_matches_at(image, preimage, current, match_beginning, match_end) {
4492 return Some(current);
4493 }
4494
4495 loop {
4496 if backwards == 0 && forwards == line_nr {
4498 return None;
4499 }
4500 if i & 1 == 1 {
4501 if backwards == 0 {
4503 i += 1;
4504 continue;
4505 }
4506 backwards -= 1;
4507 current = backwards;
4508 } else {
4509 if forwards == line_nr {
4511 i += 1;
4512 continue;
4513 }
4514 forwards += 1;
4515 current = forwards;
4516 }
4517 break;
4518 }
4519 i += 1;
4520 }
4521}
4522
4523fn preimage_matches_at(
4532 image: &[Line],
4533 preimage: &[Line],
4534 pos: usize,
4535 match_beginning: bool,
4536 match_end: bool,
4537) -> bool {
4538 if match_beginning && pos != 0 {
4539 return false;
4540 }
4541 if pos + preimage.len() > image.len() {
4543 return false;
4544 }
4545 if match_end && pos + preimage.len() != image.len() {
4546 return false;
4547 }
4548 for (i, pre) in preimage.iter().enumerate() {
4549 let img = &image[pos + i];
4550 if img.content != pre.content {
4551 return false;
4552 }
4553 if pre.no_newline != img.no_newline {
4559 return false;
4560 }
4561 }
4562 true
4563}
4564
4565fn split_patch_lines(input: &[u8]) -> Vec<&[u8]> {
4569 let mut lines = Vec::new();
4570 let mut start = 0usize;
4571 while start < input.len() {
4572 match input[start..].iter().position(|&b| b == b'\n') {
4573 Some(rel) => {
4574 let end = start + rel;
4575 lines.push(&input[start..end]);
4576 start = end + 1;
4577 }
4578 None => {
4579 lines.push(&input[start..]);
4580 start = input.len();
4581 }
4582 }
4583 }
4584 lines
4585}
4586
4587struct PatchParser<'a> {
4588 lines: &'a [&'a [u8]],
4589 index: usize,
4590 recount: bool,
4591}
4592
4593impl<'a> PatchParser<'a> {
4594 fn parse(&mut self) -> Result<Vec<FilePatch>> {
4595 let mut patches = Vec::new();
4596 while self.index < self.lines.len() {
4597 let line = self.lines[self.index];
4598 if line.starts_with(b"diff --git ") {
4599 patches.push(self.parse_file(Some(line))?);
4600 } else if line.starts_with(b"--- ") {
4601 patches.push(self.parse_file(None)?);
4603 } else if line.starts_with(b"@@ ") {
4604 return Err(GitError::InvalidFormat(
4605 "hunk header encountered before any file header".to_string(),
4606 ));
4607 } else {
4608 self.index += 1;
4610 }
4611 }
4612 Ok(patches)
4613 }
4614
4615 fn parse_file(&mut self, diff_line: Option<&[u8]>) -> Result<FilePatch> {
4619 let mut patch = FilePatch {
4620 old_path: None,
4621 new_path: None,
4622 old_mode: None,
4623 new_mode: None,
4624 hunks: Vec::new(),
4625 is_new: false,
4626 is_delete: false,
4627 is_rename: false,
4628 is_copy: false,
4629 similarity: None,
4630 dissimilarity: None,
4631 };
4632 if let Some(diff_line) = diff_line {
4635 if let Some((a, b)) = parse_diff_git_paths(diff_line) {
4636 patch.old_path = Some(a);
4637 patch.new_path = Some(b);
4638 }
4639 self.index += 1;
4640 }
4641
4642 while self.index < self.lines.len() {
4644 let line = self.lines[self.index];
4645 if line.starts_with(b"--- ") {
4646 self.parse_old_file_header(line, &mut patch);
4647 self.index += 1;
4648 break;
4649 } else if line.starts_with(b"@@ ") {
4650 break;
4652 } else if line.starts_with(b"diff --git ") {
4653 return Ok(patch);
4655 } else if let Some(rest) = strip_prefix(line, b"old mode ") {
4656 patch.old_mode = parse_octal(rest);
4657 } else if let Some(rest) = strip_prefix(line, b"new mode ") {
4658 patch.new_mode = parse_octal(rest);
4659 } else if let Some(rest) = strip_prefix(line, b"new file mode ") {
4660 patch.is_new = true;
4661 patch.new_mode = parse_octal(rest);
4662 } else if let Some(rest) = strip_prefix(line, b"deleted file mode ") {
4663 patch.is_delete = true;
4664 patch.old_mode = parse_octal(rest);
4665 } else if let Some(rest) = strip_prefix(line, b"rename from ") {
4666 patch.is_rename = true;
4667 patch.old_path = Some(rest.to_vec());
4668 } else if let Some(rest) = strip_prefix(line, b"rename to ") {
4669 patch.is_rename = true;
4670 patch.new_path = Some(rest.to_vec());
4671 } else if let Some(rest) = strip_prefix(line, b"copy from ") {
4672 patch.is_copy = true;
4673 patch.old_path = Some(rest.to_vec());
4674 } else if let Some(rest) = strip_prefix(line, b"copy to ") {
4675 patch.is_copy = true;
4676 patch.new_path = Some(rest.to_vec());
4677 } else if let Some(rest) = strip_prefix(line, b"similarity index ") {
4678 patch.similarity = parse_percent(rest);
4679 } else if let Some(rest) = strip_prefix(line, b"dissimilarity index ") {
4680 patch.dissimilarity = parse_percent(rest);
4681 } else {
4682 self.index += 1;
4684 continue;
4685 }
4686 self.index += 1;
4687 }
4688
4689 if self.index < self.lines.len() && self.lines[self.index].starts_with(b"+++ ") {
4691 self.parse_new_file_header(self.lines[self.index], &mut patch);
4692 self.index += 1;
4693 }
4694
4695 while self.index < self.lines.len() {
4697 let line = self.lines[self.index];
4698 if line.starts_with(b"@@ ") {
4699 let hunk = self.parse_hunk()?;
4700 patch.hunks.push(hunk);
4701 } else if line.starts_with(b"diff --git ") {
4702 break;
4703 } else if line.starts_with(b"--- ") {
4704 break;
4706 } else {
4707 self.index += 1;
4709 }
4710 }
4711
4712 Ok(patch)
4713 }
4714
4715 fn parse_old_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4716 let rest = strip_prefix(line, b"--- ").unwrap_or(line);
4717 let path = strip_header_path(rest);
4718 match path {
4719 HeaderPath::DevNull => {
4720 patch.is_new = true;
4721 patch.old_path = None;
4722 }
4723 HeaderPath::Path(p) => {
4724 if patch.old_path.is_none() || !(patch.is_rename || patch.is_copy) {
4726 patch.old_path = Some(p);
4727 }
4728 }
4729 }
4730 }
4731
4732 fn parse_new_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4733 let rest = strip_prefix(line, b"+++ ").unwrap_or(line);
4734 let path = strip_header_path(rest);
4735 match path {
4736 HeaderPath::DevNull => {
4737 patch.is_delete = true;
4738 patch.new_path = None;
4739 }
4740 HeaderPath::Path(p) => {
4741 if patch.new_path.is_none() || !(patch.is_rename || patch.is_copy) {
4742 patch.new_path = Some(p);
4743 }
4744 }
4745 }
4746 }
4747
4748 fn parse_hunk(&mut self) -> Result<Hunk> {
4749 let header = self.lines[self.index];
4750 let (old_start, old_len, new_start, new_len) = parse_hunk_header(header)?;
4751 self.index += 1;
4752
4753 let mut hunk = Hunk {
4754 old_start,
4755 old_len,
4756 new_start,
4757 new_len,
4758 lines: Vec::new(),
4759 old_no_newline: false,
4760 new_no_newline: false,
4761 };
4762 let mut old_seen = 0usize;
4763 let mut new_seen = 0usize;
4764
4765 while self.index < self.lines.len() {
4766 if !self.recount && old_seen >= old_len && new_seen >= new_len {
4770 break;
4771 }
4772 let line = self.lines[self.index];
4773 if self.recount
4774 && (line.starts_with(b"@@ ")
4775 || line.starts_with(b"diff --git ")
4776 || line.starts_with(b"diff a/")
4777 || line.starts_with(b"--- "))
4778 {
4779 break;
4780 }
4781 if line.is_empty() {
4782 hunk.lines.push(HunkLine::Context(Vec::new()));
4786 old_seen += 1;
4787 new_seen += 1;
4788 self.index += 1;
4789 continue;
4790 }
4791 match line[0] {
4792 b' ' => {
4793 hunk.lines.push(HunkLine::Context(line[1..].to_vec()));
4794 old_seen += 1;
4795 new_seen += 1;
4796 }
4797 b'+' => {
4798 hunk.lines.push(HunkLine::Insert(line[1..].to_vec()));
4799 new_seen += 1;
4800 }
4801 b'-' => {
4802 hunk.lines.push(HunkLine::Delete(line[1..].to_vec()));
4803 old_seen += 1;
4804 }
4805 b'\\' => {
4806 self.mark_no_newline(&mut hunk);
4809 self.index += 1;
4810 continue;
4811 }
4812 _ => {
4813 break;
4815 }
4816 }
4817 self.index += 1;
4818 }
4819
4820 if self.index < self.lines.len() && self.lines[self.index].starts_with(b"\\") {
4823 self.mark_no_newline(&mut hunk);
4824 self.index += 1;
4825 }
4826
4827 if self.recount {
4828 hunk.old_len = old_seen;
4829 hunk.new_len = new_seen;
4830 } else if old_seen != old_len || new_seen != new_len {
4831 return Err(GitError::InvalidFormat(format!(
4832 "hunk body line counts mismatch: header declared -{old_len},+{new_len} \
4833 but body had -{old_seen},+{new_seen}"
4834 )));
4835 }
4836
4837 Ok(hunk)
4838 }
4839
4840 fn mark_no_newline(&self, hunk: &mut Hunk) {
4843 match hunk.lines.last() {
4844 Some(HunkLine::Context(_)) => {
4845 hunk.old_no_newline = true;
4846 hunk.new_no_newline = true;
4847 }
4848 Some(HunkLine::Insert(_)) => hunk.new_no_newline = true,
4849 Some(HunkLine::Delete(_)) => hunk.old_no_newline = true,
4850 None => {}
4851 }
4852 }
4853}
4854
4855enum HeaderPath {
4856 DevNull,
4857 Path(Vec<u8>),
4858}
4859
4860fn strip_header_path(rest: &[u8]) -> HeaderPath {
4864 let path = match rest.iter().position(|&b| b == b'\t') {
4866 Some(tab) => &rest[..tab],
4867 None => rest,
4868 };
4869 let path = trim_ascii_end(path);
4870 if path == b"/dev/null" {
4871 return HeaderPath::DevNull;
4872 }
4873 let stripped = if path.starts_with(b"a/") || path.starts_with(b"b/") {
4875 &path[2..]
4876 } else {
4877 path
4878 };
4879 HeaderPath::Path(stripped.to_vec())
4880}
4881
4882fn parse_diff_git_paths(line: &[u8]) -> Option<(Vec<u8>, Vec<u8>)> {
4886 let rest = strip_prefix(line, b"diff --git ")?;
4887 if rest.first() == Some(&b'"') {
4890 return None;
4891 }
4892 if !rest.starts_with(b"a/") {
4896 return None;
4897 }
4898 let sep = find_subslice(rest, b" b/")?;
4899 let a = &rest[2..sep];
4900 let b = &rest[sep + 3..];
4901 Some((a.to_vec(), b.to_vec()))
4902}
4903
4904fn parse_hunk_header(line: &[u8]) -> Result<(usize, usize, usize, usize)> {
4907 let err = || GitError::InvalidFormat(format!("malformed hunk header: {}", lossy(line)));
4908 let rest = strip_prefix(line, b"@@ ").ok_or_else(err)?;
4909 let close = find_subslice(rest, b" @@").ok_or_else(err)?;
4911 let ranges = &rest[..close];
4912 let mut parts = ranges.split(|&b| b == b' ').filter(|p| !p.is_empty());
4913 let old = parts.next().ok_or_else(err)?;
4914 let new = parts.next().ok_or_else(err)?;
4915 let old = strip_prefix(old, b"-").ok_or_else(err)?;
4916 let new = strip_prefix(new, b"+").ok_or_else(err)?;
4917 let (old_start, old_len) = parse_range(old).ok_or_else(err)?;
4918 let (new_start, new_len) = parse_range(new).ok_or_else(err)?;
4919 Ok((old_start, old_len, new_start, new_len))
4920}
4921
4922fn parse_range(range: &[u8]) -> Option<(usize, usize)> {
4924 match range.iter().position(|&b| b == b',') {
4925 Some(comma) => {
4926 let start = parse_usize(&range[..comma])?;
4927 let len = parse_usize(&range[comma + 1..])?;
4928 Some((start, len))
4929 }
4930 None => Some((parse_usize(range)?, 1)),
4931 }
4932}
4933
4934fn parse_usize(bytes: &[u8]) -> Option<usize> {
4935 if bytes.is_empty() {
4936 return None;
4937 }
4938 let mut value: usize = 0;
4939 for &b in bytes {
4940 if !b.is_ascii_digit() {
4941 return None;
4942 }
4943 value = value.checked_mul(10)?.checked_add((b - b'0') as usize)?;
4944 }
4945 Some(value)
4946}
4947
4948fn parse_octal(bytes: &[u8]) -> Option<u32> {
4949 let trimmed = trim_ascii_end(bytes);
4950 if trimmed.is_empty() {
4951 return None;
4952 }
4953 let mut value: u32 = 0;
4954 for &b in trimmed {
4955 if !(b'0'..=b'7').contains(&b) {
4956 return None;
4957 }
4958 value = value.checked_mul(8)?.checked_add((b - b'0') as u32)?;
4959 }
4960 Some(value)
4961}
4962
4963fn parse_percent(bytes: &[u8]) -> Option<u8> {
4964 let trimmed = trim_ascii_end(bytes)
4965 .strip_suffix(b"%")
4966 .unwrap_or(trim_ascii_end(bytes));
4967 let value = parse_usize(trimmed)?;
4968 u8::try_from(value).ok().filter(|value| *value <= 100)
4969}
4970
4971fn strip_prefix<'b>(line: &'b [u8], prefix: &[u8]) -> Option<&'b [u8]> {
4972 if line.starts_with(prefix) {
4973 Some(&line[prefix.len()..])
4974 } else {
4975 None
4976 }
4977}
4978
4979fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
4980 if needle.is_empty() || needle.len() > haystack.len() {
4981 return None;
4982 }
4983 haystack
4984 .windows(needle.len())
4985 .position(|window| window == needle)
4986}
4987
4988fn trim_ascii_end(bytes: &[u8]) -> &[u8] {
4989 let mut end = bytes.len();
4990 while end > 0 && (bytes[end - 1] == b' ' || bytes[end - 1] == b'\r') {
4991 end -= 1;
4992 }
4993 &bytes[..end]
4994}
4995
4996fn lossy(bytes: &[u8]) -> String {
4997 String::from_utf8_lossy(bytes).into_owned()
4998}
4999
5000pub type MergeEntryMap = BTreeMap<Vec<u8>, (u32, ObjectId)>;
5023
5024#[derive(Clone, Copy, PartialEq, Eq, Debug)]
5027pub enum MergeFavor {
5028 None,
5030 Ours,
5032 Theirs,
5034}
5035
5036pub struct MergeTreesOptions<'a> {
5038 pub ours_label: &'a str,
5040 pub theirs_label: &'a str,
5042 pub ancestor_label: &'a str,
5045 pub favor: MergeFavor,
5047 pub detect_renames: bool,
5051 pub rename_threshold: u8,
5053 pub directory_renames: DirectoryRenames,
5059 pub style: ConflictStyle,
5061}
5062
5063#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
5066pub enum DirectoryRenames {
5067 #[default]
5069 False,
5070 True,
5072 Conflict,
5075}
5076
5077impl Default for MergeTreesOptions<'_> {
5078 fn default() -> Self {
5079 Self {
5080 ours_label: "ours",
5081 theirs_label: "theirs",
5082 ancestor_label: "merged common ancestors",
5083 favor: MergeFavor::None,
5084 detect_renames: false,
5085 rename_threshold: DEFAULT_RENAME_THRESHOLD,
5086 directory_renames: DirectoryRenames::False,
5087 style: ConflictStyle::Merge,
5088 }
5089 }
5090}
5091
5092#[derive(Debug, Clone, PartialEq, Eq)]
5095pub enum MergeConflictKind {
5096 Content { add_add: bool },
5099 ModifyDelete {
5101 deleted_in: String,
5103 modified_in: String,
5105 },
5106 RenameContent {
5109 old_path: Vec<u8>,
5111 },
5112 RenameRenameTwoToOne {
5115 ours_path: Vec<u8>,
5117 theirs_path: Vec<u8>,
5119 },
5120 RenameRenameOneToTwo {
5123 old_path: Vec<u8>,
5125 ours_path: Vec<u8>,
5127 theirs_path: Vec<u8>,
5129 ours_label: String,
5131 theirs_label: String,
5133 },
5134 RenameRenameOneToTwoStage,
5137 DirRenameSplit {
5140 source_dir: Vec<u8>,
5142 },
5143 RenameDelete {
5145 old_path: Vec<u8>,
5147 renamed_in: String,
5149 deleted_in: String,
5151 },
5152 FileDirectory {
5158 original_path: Vec<u8>,
5160 moved_from: String,
5162 },
5163 DirRenameLocation {
5169 old_path: Vec<u8>,
5172 renamed_from: Option<Vec<u8>>,
5176 added_in: String,
5178 dir_renamed_in: String,
5180 },
5181 DirRenameImplicitCollision {
5188 sources: Vec<Vec<u8>>,
5190 },
5191}
5192
5193#[derive(Debug, Clone)]
5195pub struct MergedPath {
5196 pub path: Vec<u8>,
5198 pub stages: MergeStages,
5201 pub result: Option<(u32, ObjectId)>,
5204 pub worktree: Option<(u32, Vec<u8>)>,
5208 pub conflict: Option<MergeConflictKind>,
5210 pub auto_merged: bool,
5215}
5216
5217impl MergedPath {
5218 pub fn is_clean(&self) -> bool {
5220 self.conflict.is_none()
5221 }
5222}
5223
5224#[derive(Debug, Clone, Default)]
5226pub struct MergeStages {
5227 pub base: Option<(u32, ObjectId)>,
5228 pub ours: Option<(u32, ObjectId)>,
5229 pub theirs: Option<(u32, ObjectId)>,
5230}
5231
5232#[derive(Debug, Clone)]
5235pub struct MergeTreesResult {
5236 pub tree: ObjectId,
5239 pub paths: Vec<MergedPath>,
5241 pub clean: bool,
5243 pub cleanup_paths: Vec<Vec<u8>>,
5247 pub info_messages: Vec<MergeInfoMessage>,
5249}
5250
5251impl MergeTreesResult {
5252 pub fn conflicts(&self) -> impl Iterator<Item = &MergedPath> {
5254 self.paths.iter().filter(|entry| entry.conflict.is_some())
5255 }
5256}
5257
5258#[derive(Debug, Clone, PartialEq, Eq)]
5260pub enum MergeInfoMessage {
5261 DirRenameSkippedDueToRerename {
5264 old_dir: Vec<u8>,
5265 path: Vec<u8>,
5266 new_dir: Vec<u8>,
5267 },
5268 DirRenameApplied {
5271 old_path: Vec<u8>,
5272 new_path: Vec<u8>,
5273 renamed_from: Option<Vec<u8>>,
5274 added_in: String,
5275 dir_renamed_in: String,
5276 },
5277 DirRenameLocationConflict {
5282 old_path: Vec<u8>,
5283 new_path: Vec<u8>,
5284 renamed_from: Option<Vec<u8>>,
5285 added_in: String,
5286 dir_renamed_in: String,
5287 },
5288 RenameDeleteConflict {
5292 old_path: Vec<u8>,
5293 new_path: Vec<u8>,
5294 renamed_in: String,
5295 deleted_in: String,
5296 },
5297}
5298
5299pub fn flatten_tree(
5302 reader: &impl ObjectReader,
5303 format: ObjectFormat,
5304 tree_oid: &ObjectId,
5305) -> Result<MergeEntryMap> {
5306 let mut entries = BTreeMap::new();
5307 if *tree_oid == empty_tree_oid(format)? {
5308 return Ok(entries);
5309 }
5310 collect_flat_tree(reader, format, tree_oid, Vec::new(), &mut entries)?;
5311 Ok(entries)
5312}
5313
5314fn collect_flat_tree(
5315 reader: &impl ObjectReader,
5316 format: ObjectFormat,
5317 tree_oid: &ObjectId,
5318 prefix: Vec<u8>,
5319 entries: &mut MergeEntryMap,
5320) -> Result<()> {
5321 let object = reader.read_object(tree_oid)?;
5322 if object.object_type != ObjectType::Tree {
5323 return Err(GitError::InvalidObject(format!(
5324 "expected tree {}, found {}",
5325 tree_oid,
5326 object.object_type.as_str()
5327 )));
5328 }
5329 for entry in TreeEntries::new(format, &object.body) {
5330 let entry = entry?;
5331 let mut path = prefix.clone();
5332 if !path.is_empty() {
5333 path.push(b'/');
5334 }
5335 path.extend_from_slice(entry.name);
5336 if entry.mode == 0o040000 {
5337 collect_flat_tree(reader, format, &entry.oid, path, entries)?;
5338 } else {
5339 entries.insert(path, (entry.mode, entry.oid));
5340 }
5341 }
5342 Ok(())
5343}
5344
5345pub fn is_mergeable_file_mode(mode: u32) -> bool {
5348 mode == 0o100644 || mode == 0o100755
5349}
5350
5351pub fn merge_trees(
5364 db: &FileObjectDatabase,
5365 format: ObjectFormat,
5366 base: Option<&ObjectId>,
5367 ours: &ObjectId,
5368 theirs: &ObjectId,
5369 options: &MergeTreesOptions<'_>,
5370) -> Result<MergeTreesResult> {
5371 let base_map = match base {
5372 Some(tree) => flatten_tree(db, format, tree)?,
5373 None => MergeEntryMap::new(),
5374 };
5375 let ours_map = flatten_tree(db, format, ours)?;
5376 let theirs_map = flatten_tree(db, format, theirs)?;
5377 merge_entry_maps(db, format, &base_map, &ours_map, &theirs_map, options)
5378}
5379
5380pub fn merge_entry_maps(
5384 db: &FileObjectDatabase,
5385 format: ObjectFormat,
5386 base_map: &MergeEntryMap,
5387 ours_map: &MergeEntryMap,
5388 theirs_map: &MergeEntryMap,
5389 options: &MergeTreesOptions<'_>,
5390) -> Result<MergeTreesResult> {
5391 let (mut renames, side_renames) = if options.detect_renames {
5398 let (renames, ours_side, theirs_side) =
5399 detect_merge_renames(db, format, base_map, ours_map, theirs_map, options)?;
5400 (renames, Some((ours_side, theirs_side)))
5401 } else {
5402 (MergeRenames::default(), None)
5403 };
5404
5405 let (mut eff_base, mut eff_ours, mut eff_theirs) =
5407 apply_merge_renames(base_map, ours_map, theirs_map, &renames);
5408
5409 let mut dir_rename_dirty = false;
5417 let mut rehomed_paths: BTreeMap<Vec<u8>, RehomeSides> = BTreeMap::new();
5418 let mut dir_rename_two_to_one: Vec<DirRenameTwoToOne> = Vec::new();
5419 let mut dir_rename_collisions: Vec<DirRenameCollision> = Vec::new();
5420 let mut dir_rename_splits: BTreeSet<Vec<u8>> = BTreeSet::new();
5421 let mut info_messages = Vec::new();
5422 let mut cleanup_paths: BTreeSet<Vec<u8>> = renames
5423 .dest_to_source
5424 .values()
5425 .map(|rename| rename.source.clone())
5426 .collect();
5427 if options.directory_renames != DirectoryRenames::False
5428 && let Some((ours_side, theirs_side)) = &side_renames
5429 {
5430 let dir_renames = compute_directory_renames(ours_map, theirs_map, ours_side, theirs_side);
5431 let outcome = apply_directory_renames(
5432 base_map,
5433 &eff_base,
5434 &eff_ours,
5435 &eff_theirs,
5436 ours_side,
5437 theirs_side,
5438 &dir_renames,
5439 &renames.dest_to_source,
5440 );
5441 eff_base = outcome.base;
5442 eff_ours = outcome.ours;
5443 eff_theirs = outcome.theirs;
5444 rehomed_paths = outcome.rehomed;
5445 dir_rename_collisions = outcome.collisions;
5446 dir_rename_splits = outcome.splits;
5447 info_messages = outcome.info_messages;
5448 dir_rename_dirty = outcome.dirty;
5449 remap_rename_destinations(&mut renames, &rehomed_paths);
5450 drop_collapsed_rename_rename_conflicts(&mut renames);
5451 dir_rename_two_to_one = collect_dir_rename_two_to_one(&renames, &rehomed_paths);
5452 }
5453 for info in rehomed_paths
5454 .values()
5455 .flat_map(|sides| [&sides.ours, &sides.theirs])
5456 .flatten()
5457 {
5458 cleanup_paths.insert(info.old_path.clone());
5459 }
5460 if options.directory_renames == DirectoryRenames::True {
5461 for (dest, sides) in &rehomed_paths {
5462 for info in [&sides.ours, &sides.theirs].into_iter().flatten() {
5463 let (added_in, dir_renamed_in) = if info.added_on_ours {
5464 (
5465 options.ours_label.to_string(),
5466 options.theirs_label.to_string(),
5467 )
5468 } else {
5469 (
5470 options.theirs_label.to_string(),
5471 options.ours_label.to_string(),
5472 )
5473 };
5474 info_messages.push(MergeInfoMessage::DirRenameApplied {
5475 old_path: info.old_path.clone(),
5476 new_path: dest.clone(),
5477 renamed_from: info.renamed_from.clone(),
5478 added_in,
5479 dir_renamed_in,
5480 });
5481 }
5482 }
5483 }
5484 let dir_rename_conflict_paths: BTreeMap<Vec<u8>, RehomeSides> =
5487 if options.directory_renames == DirectoryRenames::Conflict {
5488 rehomed_paths.clone()
5489 } else {
5490 BTreeMap::new()
5491 };
5492
5493 let mut all_paths = BTreeSet::new();
5494 all_paths.extend(eff_base.keys().cloned());
5495 all_paths.extend(eff_ours.keys().cloned());
5496 all_paths.extend(eff_theirs.keys().cloned());
5497
5498 let mut paths: Vec<MergedPath> = Vec::new();
5499 let mut leaves: MergeEntryMap = BTreeMap::new();
5500 let mut clean = true;
5501
5502 for path in all_paths {
5503 let base = eff_base.get(&path).cloned();
5504 let ours = eff_ours.get(&path).cloned();
5505 let theirs = eff_theirs.get(&path).cloned();
5506 let rename = renames.dest_to_source.get(&path);
5507 let old_path = rename.map(|r| r.source.clone());
5508
5509 if ours == theirs {
5511 if let Some(entry) = ours {
5512 leaves.insert(path.clone(), entry);
5513 }
5514 paths.push(clean_path(path, ours));
5515 continue;
5516 }
5517 if ours == base {
5518 if let Some(entry) = &theirs {
5519 leaves.insert(path.clone(), *entry);
5520 }
5521 paths.push(clean_path(path, theirs));
5522 continue;
5523 }
5524 if theirs == base {
5525 if let Some(entry) = &ours {
5526 leaves.insert(path.clone(), *entry);
5527 }
5528 paths.push(clean_path(path, ours));
5529 continue;
5530 }
5531
5532 let content_mergeable = matches!(&ours, Some((mode, _)) if is_mergeable_file_mode(*mode))
5534 && matches!(&theirs, Some((mode, _)) if is_mergeable_file_mode(*mode))
5535 && match &base {
5536 Some((mode, _)) => is_mergeable_file_mode(*mode),
5537 None => true,
5538 };
5539
5540 if let (true, Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
5541 (content_mergeable, &ours, &theirs)
5542 {
5543 let add_add = base.is_none();
5544 let base_bytes = match &base {
5545 Some((_, oid)) => merge_blob_bytes(db, oid)?,
5546 None => Vec::new(),
5547 };
5548 let ours_bytes = merge_blob_bytes(db, ours_oid)?;
5549 let theirs_bytes = merge_blob_bytes(db, theirs_oid)?;
5550 let rehome = rehomed_paths.get(&path);
5555 let (ours_label, theirs_label) = match rename {
5556 Some(MergeRename { source, side }) => {
5557 let (ours_path, theirs_path) = match side {
5558 RenameSide::Theirs => (source.as_slice(), path.as_slice()),
5560 RenameSide::Ours => (path.as_slice(), source.as_slice()),
5562 };
5563 (
5564 qualify_label(options.ours_label, ours_path),
5565 qualify_label(options.theirs_label, theirs_path),
5566 )
5567 }
5568 None => {
5569 let ours_path = rehome
5570 .and_then(|info| info.ours.as_ref())
5571 .map_or(path.as_slice(), |info| info.old_path.as_slice());
5572 let theirs_path = rehome
5573 .and_then(|info| info.theirs.as_ref())
5574 .map_or(path.as_slice(), |info| info.old_path.as_slice());
5575 if ours_path != path.as_slice() || theirs_path != path.as_slice() {
5576 (
5577 qualify_label(options.ours_label, ours_path),
5578 qualify_label(options.theirs_label, theirs_path),
5579 )
5580 } else {
5581 (
5582 options.ours_label.to_string(),
5583 options.theirs_label.to_string(),
5584 )
5585 }
5586 }
5587 };
5588 let result = merge_blobs(
5589 &base_bytes,
5590 &ours_bytes,
5591 &theirs_bytes,
5592 &MergeBlobOptions {
5593 ours_label: &ours_label,
5594 theirs_label: &theirs_label,
5595 base_label: options.ancestor_label,
5596 style: options.style,
5597 },
5598 );
5599
5600 let base_mode = base.as_ref().map(|(mode, _)| *mode);
5601 let (resolved_mode, mode_conflict) =
5602 merge_file_modes(base_mode, *ours_mode, *theirs_mode);
5603
5604 if !result.conflicted && !mode_conflict {
5605 let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content))?;
5606 leaves.insert(path.clone(), (resolved_mode, oid));
5607 paths.push(clean_path_auto(path, Some((resolved_mode, oid)), true));
5608 } else if options.favor != MergeFavor::None && !mode_conflict {
5609 let chosen = if options.favor == MergeFavor::Ours {
5610 ours
5611 } else {
5612 theirs
5613 };
5614 if let Some(entry) = chosen {
5615 leaves.insert(path.clone(), entry);
5616 }
5617 paths.push(clean_path_auto(path, chosen, true));
5618 } else {
5619 clean = false;
5620 let oid =
5621 db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
5622 leaves.insert(path.clone(), (resolved_mode, oid));
5623 let worktree_mode = if *ours_mode == *theirs_mode {
5624 *ours_mode
5625 } else {
5626 0o100644
5627 };
5628 let conflict = if let Some(old) = &old_path {
5629 MergeConflictKind::RenameContent {
5630 old_path: old.clone(),
5631 }
5632 } else if add_add {
5633 match rehome.and_then(|info| Some((info.ours.as_ref()?, info.theirs.as_ref()?)))
5634 {
5635 Some((ours_info, theirs_info)) => MergeConflictKind::RenameRenameTwoToOne {
5636 ours_path: ours_info.old_path.clone(),
5637 theirs_path: theirs_info.old_path.clone(),
5638 },
5639 None => MergeConflictKind::Content { add_add },
5640 }
5641 } else {
5642 MergeConflictKind::Content { add_add }
5643 };
5644 paths.push(MergedPath {
5645 path: path.clone(),
5646 stages: stages_for(&base, &ours, &theirs),
5647 result: Some((resolved_mode, oid)),
5648 worktree: Some((worktree_mode, result.content)),
5649 conflict: Some(conflict),
5650 auto_merged: true,
5651 });
5652 }
5653 } else if base.is_some() && (ours.is_none() || theirs.is_none()) {
5654 clean = false;
5656 let (deleted_in, modified_in, surviving) = if ours.is_none() {
5657 (
5658 options.ours_label.to_string(),
5659 options.theirs_label.to_string(),
5660 theirs,
5661 )
5662 } else {
5663 (
5664 options.theirs_label.to_string(),
5665 options.ours_label.to_string(),
5666 ours,
5667 )
5668 };
5669 let worktree = match &surviving {
5670 Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
5671 None => None,
5672 };
5673 if let Some(entry) = surviving {
5674 leaves.insert(path.clone(), entry);
5675 }
5676 paths.push(MergedPath {
5677 path: path.clone(),
5678 stages: stages_for(&base, &ours, &theirs),
5679 result: surviving,
5680 worktree,
5681 conflict: Some(MergeConflictKind::ModifyDelete {
5682 deleted_in,
5683 modified_in,
5684 }),
5685 auto_merged: false,
5686 });
5687 } else {
5688 clean = false;
5691 let add_add = base.is_none();
5692 let surviving = ours.or(theirs);
5693 let worktree = match &surviving {
5694 Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
5695 None => None,
5696 };
5697 if let Some(entry) = surviving {
5698 leaves.insert(path.clone(), entry);
5699 }
5700 paths.push(MergedPath {
5701 path: path.clone(),
5702 stages: stages_for(&base, &ours, &theirs),
5703 result: surviving,
5704 worktree,
5705 conflict: Some(MergeConflictKind::Content { add_add }),
5706 auto_merged: false,
5707 });
5708 }
5709 }
5710
5711 if !renames.rename_rename_one_to_two.is_empty() {
5712 apply_rename_rename_one_to_two_conflicts(
5713 db,
5714 base_map,
5715 &eff_ours,
5716 &eff_theirs,
5717 &renames.rename_rename_one_to_two,
5718 &mut paths,
5719 &mut leaves,
5720 options,
5721 )?;
5722 clean = false;
5723 }
5724
5725 if !dir_rename_two_to_one.is_empty() {
5726 apply_dir_rename_two_to_one_conflicts(
5727 db,
5728 &eff_ours,
5729 &eff_theirs,
5730 &dir_rename_two_to_one,
5731 &mut paths,
5732 &mut leaves,
5733 options,
5734 )?;
5735 clean = false;
5736 }
5737
5738 if !renames.rename_deletes.is_empty() {
5743 for (dest, rd) in &renames.rename_deletes {
5744 let Some(slot) = paths.iter_mut().find(|p| &p.path == dest) else {
5746 continue;
5747 };
5748 if slot.conflict.is_some() {
5749 continue;
5750 }
5751 let base_entry = base_map.get(&rd.source).copied();
5752 let renamed_entry = slot.result;
5753 let (ours_stage, theirs_stage) = match rd.side {
5756 RenameSide::Ours => (renamed_entry, None),
5757 RenameSide::Theirs => (None, renamed_entry),
5758 };
5759 let (renamed_in, deleted_in) = match rd.side {
5760 RenameSide::Ours => (
5761 options.ours_label.to_string(),
5762 options.theirs_label.to_string(),
5763 ),
5764 RenameSide::Theirs => (
5765 options.theirs_label.to_string(),
5766 options.ours_label.to_string(),
5767 ),
5768 };
5769 let worktree = match &renamed_entry {
5770 Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
5771 None => None,
5772 };
5773 slot.stages = MergeStages {
5774 base: base_entry,
5775 ours: ours_stage,
5776 theirs: theirs_stage,
5777 };
5778 slot.worktree = worktree;
5779 slot.conflict = Some(MergeConflictKind::RenameDelete {
5780 old_path: rd.source.clone(),
5781 renamed_in,
5782 deleted_in,
5783 });
5784 clean = false;
5785 }
5786 }
5787
5788 if dir_rename_dirty {
5795 clean = false;
5796 }
5797 for collision in &dir_rename_collisions {
5803 clean = false;
5804 if let Some(slot) = paths.iter_mut().find(|p| p.path == collision.dest)
5805 && slot.conflict.is_none()
5806 {
5807 slot.conflict = Some(MergeConflictKind::DirRenameImplicitCollision {
5808 sources: collision.sources.clone(),
5809 });
5810 } else if !paths.iter().any(|p| p.path == collision.dest) {
5811 paths.push(MergedPath {
5812 path: collision.dest.clone(),
5813 stages: MergeStages::default(),
5814 result: None,
5815 worktree: None,
5816 conflict: Some(MergeConflictKind::DirRenameImplicitCollision {
5817 sources: collision.sources.clone(),
5818 }),
5819 auto_merged: false,
5820 });
5821 }
5822 }
5823 for source_dir in &dir_rename_splits {
5824 clean = false;
5825 paths.push(MergedPath {
5826 path: source_dir.clone(),
5827 stages: MergeStages::default(),
5828 result: None,
5829 worktree: None,
5830 conflict: Some(MergeConflictKind::DirRenameSplit {
5831 source_dir: source_dir.clone(),
5832 }),
5833 auto_merged: false,
5834 });
5835 }
5836 if !dir_rename_conflict_paths.is_empty() {
5837 clean = false;
5838 for (dest, infos) in &dir_rename_conflict_paths {
5839 for info in [&infos.ours, &infos.theirs].into_iter().flatten() {
5840 let (added_in, dir_renamed_in) = if info.added_on_ours {
5841 (
5843 options.ours_label.to_string(),
5844 options.theirs_label.to_string(),
5845 )
5846 } else {
5847 (
5848 options.theirs_label.to_string(),
5849 options.ours_label.to_string(),
5850 )
5851 };
5852 if let Some(slot) = paths.iter_mut().find(|p| &p.path == dest)
5853 && slot.conflict.is_none()
5854 {
5855 slot.conflict = Some(MergeConflictKind::DirRenameLocation {
5856 old_path: info.old_path.clone(),
5857 renamed_from: info.renamed_from.clone(),
5858 added_in,
5859 dir_renamed_in,
5860 });
5861 } else {
5862 info_messages.push(MergeInfoMessage::DirRenameLocationConflict {
5863 old_path: info.old_path.clone(),
5864 new_path: dest.clone(),
5865 renamed_from: info.renamed_from.clone(),
5866 added_in,
5867 dir_renamed_in,
5868 });
5869 }
5870 }
5871 }
5872 }
5873
5874 resolve_directory_file_conflicts(
5882 db,
5883 &mut paths,
5884 &mut leaves,
5885 &mut clean,
5886 &eff_ours,
5887 &eff_theirs,
5888 options,
5889 &mut info_messages,
5890 )?;
5891
5892 let tree = write_merged_tree(db, &leaves)?;
5893
5894 cleanup_paths.retain(|path| !leaves.contains_key(path));
5895
5896 Ok(MergeTreesResult {
5897 tree,
5898 paths,
5899 clean,
5900 cleanup_paths: cleanup_paths.into_iter().collect(),
5901 info_messages,
5902 })
5903}
5904
5905fn flatten_branch_label(branch: &str) -> String {
5909 branch.replace('/', "_")
5910}
5911
5912fn unique_df_path(
5916 path: &[u8],
5917 branch: &str,
5918 leaves: &MergeEntryMap,
5919 paths: &[MergedPath],
5920) -> Vec<u8> {
5921 let mut base = path.to_vec();
5922 base.push(b'~');
5923 base.extend_from_slice(flatten_branch_label(branch).as_bytes());
5924 let taken = |candidate: &[u8]| {
5925 leaves.contains_key(candidate) || paths.iter().any(|p| p.path == candidate)
5926 };
5927 if !taken(&base) {
5928 return base;
5929 }
5930 let mut suffix = 0usize;
5931 loop {
5932 let mut candidate = base.clone();
5933 candidate.push(b'_');
5934 candidate.extend_from_slice(suffix.to_string().as_bytes());
5935 if !taken(&candidate) {
5936 return candidate;
5937 }
5938 suffix += 1;
5939 }
5940}
5941
5942#[allow(clippy::too_many_arguments)]
5946fn resolve_directory_file_conflicts(
5947 db: &FileObjectDatabase,
5948 paths: &mut Vec<MergedPath>,
5949 leaves: &mut MergeEntryMap,
5950 clean: &mut bool,
5951 eff_ours: &MergeEntryMap,
5952 eff_theirs: &MergeEntryMap,
5953 options: &MergeTreesOptions<'_>,
5954 info_messages: &mut Vec<MergeInfoMessage>,
5955) -> Result<()> {
5956 let mut directory_prefixes: BTreeSet<Vec<u8>> = BTreeSet::new();
5959 for key in leaves.keys() {
5960 let mut idx = 0;
5961 while let Some(pos) = key[idx..].iter().position(|b| *b == b'/') {
5962 let end = idx + pos;
5963 directory_prefixes.insert(key[..end].to_vec());
5964 idx = end + 1;
5965 }
5966 }
5967 if directory_prefixes.is_empty() {
5968 return Ok(());
5969 }
5970
5971 let colliding: Vec<Vec<u8>> = leaves
5973 .keys()
5974 .filter(|key| directory_prefixes.contains(*key))
5975 .cloned()
5976 .collect();
5977
5978 for original in colliding {
5979 let Some(entry) = leaves.remove(&original) else {
5980 continue;
5981 };
5982 let moved_bytes = merge_worktree_bytes(db, entry.0, &entry.1)?;
5985 let ours_has_file = eff_ours.contains_key(&original);
5991 let theirs_has_file = eff_theirs.contains_key(&original);
5992 let from_ours = ours_has_file || !theirs_has_file;
5993 let branch = if from_ours {
5994 options.ours_label
5995 } else {
5996 options.theirs_label
5997 };
5998 let new_path = unique_df_path(&original, branch, leaves, paths);
5999 leaves.insert(new_path.clone(), entry);
6000 *clean = false;
6001
6002 if let Some(slot) = paths.iter_mut().find(|p| p.path == original) {
6005 if let Some(MergeConflictKind::RenameDelete {
6006 old_path,
6007 renamed_in,
6008 deleted_in,
6009 }) = &slot.conflict
6010 {
6011 info_messages.push(MergeInfoMessage::RenameDeleteConflict {
6012 old_path: old_path.clone(),
6013 new_path: original.clone(),
6014 renamed_in: renamed_in.clone(),
6015 deleted_in: deleted_in.clone(),
6016 });
6017 }
6018 slot.path = new_path.clone();
6019 slot.result = Some(entry);
6020 if slot.stages.base.is_none()
6023 && slot.stages.ours.is_none()
6024 && slot.stages.theirs.is_none()
6025 {
6026 slot.stages = MergeStages {
6027 base: None,
6028 ours: if from_ours { Some(entry) } else { None },
6029 theirs: if from_ours { None } else { Some(entry) },
6030 };
6031 }
6032 slot.worktree = Some((entry.0, moved_bytes));
6038 slot.conflict = Some(MergeConflictKind::FileDirectory {
6039 original_path: original.clone(),
6040 moved_from: branch.to_string(),
6041 });
6042 } else {
6043 paths.push(MergedPath {
6044 path: new_path.clone(),
6045 stages: MergeStages {
6046 base: None,
6047 ours: if from_ours { Some(entry) } else { None },
6048 theirs: if from_ours { None } else { Some(entry) },
6049 },
6050 result: Some(entry),
6051 worktree: Some((entry.0, moved_bytes)),
6052 conflict: Some(MergeConflictKind::FileDirectory {
6053 original_path: original.clone(),
6054 moved_from: branch.to_string(),
6055 }),
6056 auto_merged: false,
6057 });
6058 }
6059 }
6060
6061 paths.sort_by(|a, b| a.path.cmp(&b.path));
6063 Ok(())
6064}
6065
6066fn clean_path(path: Vec<u8>, result: Option<(u32, ObjectId)>) -> MergedPath {
6068 clean_path_auto(path, result, false)
6069}
6070
6071fn clean_path_auto(
6074 path: Vec<u8>,
6075 result: Option<(u32, ObjectId)>,
6076 auto_merged: bool,
6077) -> MergedPath {
6078 MergedPath {
6079 path,
6080 stages: MergeStages::default(),
6081 result,
6082 worktree: None,
6083 conflict: None,
6084 auto_merged,
6085 }
6086}
6087
6088fn stages_for(
6090 base: &Option<(u32, ObjectId)>,
6091 ours: &Option<(u32, ObjectId)>,
6092 theirs: &Option<(u32, ObjectId)>,
6093) -> MergeStages {
6094 MergeStages {
6095 base: *base,
6096 ours: *ours,
6097 theirs: *theirs,
6098 }
6099}
6100
6101fn merge_blob_bytes(reader: &impl ObjectReader, oid: &ObjectId) -> Result<Vec<u8>> {
6103 let object = reader.read_object(oid)?;
6104 if object.object_type != ObjectType::Blob {
6105 return Err(GitError::InvalidObject(format!(
6106 "expected blob {}, found {}",
6107 oid,
6108 object.object_type.as_str()
6109 )));
6110 }
6111 Ok(object.body.clone())
6112}
6113
6114fn merge_worktree_bytes(reader: &impl ObjectReader, mode: u32, oid: &ObjectId) -> Result<Vec<u8>> {
6115 if sley_index::is_gitlink(mode) {
6116 Ok(Vec::new())
6117 } else {
6118 merge_blob_bytes(reader, oid)
6119 }
6120}
6121
6122fn merge_file_modes(base: Option<u32>, ours: u32, theirs: u32) -> (u32, bool) {
6125 if ours == theirs {
6126 return (ours, false);
6127 }
6128 match base {
6129 Some(base) if ours == base => (theirs, false),
6130 Some(base) if theirs == base => (ours, false),
6131 _ => (ours, true),
6132 }
6133}
6134
6135fn write_merged_tree(db: &FileObjectDatabase, leaves: &MergeEntryMap) -> Result<ObjectId> {
6138 let mut root = MergeTreeNode::default();
6139 for (path, (mode, oid)) in leaves {
6140 root.insert(path, *mode, *oid);
6141 }
6142 root.write(db)
6143}
6144
6145#[derive(Default)]
6146struct MergeTreeNode {
6147 blobs: BTreeMap<Vec<u8>, (u32, ObjectId)>,
6148 subtrees: BTreeMap<Vec<u8>, MergeTreeNode>,
6149}
6150
6151impl MergeTreeNode {
6152 fn insert(&mut self, path: &[u8], mode: u32, oid: ObjectId) {
6153 match path.iter().position(|byte| *byte == b'/') {
6154 Some(slash) => {
6155 let component = path[..slash].to_vec();
6156 let rest = &path[slash + 1..];
6157 self.subtrees
6158 .entry(component)
6159 .or_default()
6160 .insert(rest, mode, oid);
6161 }
6162 None => {
6163 self.blobs.insert(path.to_vec(), (mode, oid));
6164 }
6165 }
6166 }
6167
6168 fn write(&self, db: &FileObjectDatabase) -> Result<ObjectId> {
6169 let mut entries: Vec<TreeEntry> = Vec::new();
6170 for (name, (mode, oid)) in &self.blobs {
6171 entries.push(TreeEntry {
6172 mode: *mode,
6173 name: BString::from(name.clone()),
6174 oid: *oid,
6175 });
6176 }
6177 for (name, subtree) in &self.subtrees {
6178 let oid = subtree.write(db)?;
6179 entries.push(TreeEntry {
6180 mode: 0o040000,
6181 name: BString::from(name.clone()),
6182 oid,
6183 });
6184 }
6185 entries.sort_by_key(merge_tree_sort_key);
6186 let tree = Tree { entries };
6187 db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
6188 }
6189}
6190
6191fn merge_tree_sort_key(entry: &TreeEntry) -> Vec<u8> {
6192 let mut key = entry.name.as_bytes().to_vec();
6193 if entry.mode == 0o040000 {
6194 key.push(b'/');
6195 }
6196 key
6197}
6198
6199#[derive(Clone, Copy, PartialEq, Eq)]
6203enum RenameSide {
6204 Ours,
6205 Theirs,
6206}
6207
6208#[derive(Clone)]
6210struct MergeRename {
6211 source: Vec<u8>,
6212 side: RenameSide,
6213}
6214
6215#[derive(Clone)]
6219struct RenameDelete {
6220 source: Vec<u8>,
6222 side: RenameSide,
6224}
6225
6226#[derive(Default)]
6230struct MergeRenames {
6231 dest_to_source: BTreeMap<Vec<u8>, MergeRename>,
6235 rename_deletes: BTreeMap<Vec<u8>, RenameDelete>,
6238 rename_rename_one_to_two: BTreeMap<Vec<u8>, RenameRenameOneToTwo>,
6240}
6241
6242#[derive(Clone)]
6243struct RenameRenameOneToTwo {
6244 ours_dest: Vec<u8>,
6245 theirs_dest: Vec<u8>,
6246}
6247
6248struct SideRenames {
6253 pairs: Vec<(Vec<u8>, Vec<u8>)>,
6254}
6255
6256fn detect_merge_renames(
6266 db: &FileObjectDatabase,
6267 format: ObjectFormat,
6268 base_map: &MergeEntryMap,
6269 ours_map: &MergeEntryMap,
6270 theirs_map: &MergeEntryMap,
6271 options: &MergeTreesOptions<'_>,
6272) -> Result<(MergeRenames, SideRenames, SideRenames)> {
6273 let mut renames = MergeRenames::default();
6274
6275 let ours_side = collect_side_renames(
6277 db,
6278 format,
6279 base_map,
6280 ours_map,
6281 theirs_map,
6282 RenameSide::Ours,
6283 options.rename_threshold,
6284 &mut renames,
6285 )?;
6286 let theirs_side = collect_side_renames(
6288 db,
6289 format,
6290 base_map,
6291 theirs_map,
6292 ours_map,
6293 RenameSide::Theirs,
6294 options.rename_threshold,
6295 &mut renames,
6296 )?;
6297
6298 collect_rename_rename_one_to_two(&mut renames, &ours_side, &theirs_side);
6299
6300 Ok((renames, ours_side, theirs_side))
6301}
6302
6303fn collect_rename_rename_one_to_two(
6304 renames: &mut MergeRenames,
6305 ours_side: &SideRenames,
6306 theirs_side: &SideRenames,
6307) {
6308 let ours_by_source: BTreeMap<&[u8], &[u8]> = ours_side
6309 .pairs
6310 .iter()
6311 .map(|(old, new)| (old.as_slice(), new.as_slice()))
6312 .collect();
6313 for (old, theirs_new) in &theirs_side.pairs {
6314 let Some(ours_new) = ours_by_source.get(old.as_slice()) else {
6315 continue;
6316 };
6317 if *ours_new == theirs_new.as_slice() {
6318 continue;
6319 }
6320 renames.rename_deletes.remove(*ours_new);
6321 renames.rename_deletes.remove(theirs_new);
6322 renames.dest_to_source.remove(*ours_new);
6323 renames.dest_to_source.remove(theirs_new);
6324 renames.rename_rename_one_to_two.insert(
6325 old.clone(),
6326 RenameRenameOneToTwo {
6327 ours_dest: (*ours_new).to_vec(),
6328 theirs_dest: theirs_new.clone(),
6329 },
6330 );
6331 }
6332}
6333
6334#[allow(clippy::too_many_arguments)]
6339fn collect_side_renames(
6340 db: &FileObjectDatabase,
6341 format: ObjectFormat,
6342 base_map: &MergeEntryMap,
6343 side_map: &MergeEntryMap,
6344 other_map: &MergeEntryMap,
6345 side: RenameSide,
6346 threshold: u8,
6347 renames: &mut MergeRenames,
6348) -> Result<SideRenames> {
6349 let base_tree = entry_map_as_tracked(base_map);
6352 let side_tree = entry_map_as_tracked(side_map);
6353 let options = RenameDetectionOptions {
6354 base: DiffNameStatusOptions {
6355 detect_renames: true,
6356 detect_copies: false,
6357 find_copies_harder: false,
6358 rename_empty: false,
6359 },
6360 detect_inexact: true,
6361 rename_threshold: threshold,
6362 copy_threshold: threshold,
6363 };
6364 let changes = diff_name_status_maps_with_renames(
6365 &base_tree,
6366 &side_tree,
6367 base_tree.keys().chain(side_tree.keys()),
6368 options,
6369 |oid| merge_blob_bytes(db, oid).ok(),
6370 )?;
6371
6372 let mut pairs = Vec::new();
6373 for change in changes {
6374 let NameStatus::Renamed(_) = change.status else {
6375 continue;
6376 };
6377 let Some(old_path) = change.old_path.as_ref() else {
6378 continue;
6379 };
6380 let old = old_path.as_bytes().to_vec();
6381 let new = change.path.as_bytes().to_vec();
6382 pairs.push((old.clone(), new.clone()));
6384
6385 if !other_map.contains_key(&old) {
6390 if base_map.contains_key(&old) && !other_map.contains_key(&new) {
6395 renames
6396 .rename_deletes
6397 .entry(new.clone())
6398 .or_insert(RenameDelete {
6399 source: old.clone(),
6400 side,
6401 });
6402 }
6403 continue;
6404 }
6405 if other_map.contains_key(&new) {
6408 continue;
6409 }
6410 renames
6414 .dest_to_source
6415 .entry(new)
6416 .or_insert(MergeRename { source: old, side });
6417 }
6418
6419 let _ = format;
6420 Ok(SideRenames { pairs })
6421}
6422
6423fn apply_merge_renames(
6429 base_map: &MergeEntryMap,
6430 ours_map: &MergeEntryMap,
6431 theirs_map: &MergeEntryMap,
6432 renames: &MergeRenames,
6433) -> (MergeEntryMap, MergeEntryMap, MergeEntryMap) {
6434 if renames.dest_to_source.is_empty() {
6435 return (base_map.clone(), ours_map.clone(), theirs_map.clone());
6436 }
6437 let mut base = base_map.clone();
6438 let mut ours = ours_map.clone();
6439 let mut theirs = theirs_map.clone();
6440
6441 for (new, rename) in &renames.dest_to_source {
6442 let old = &rename.source;
6443 if let Some(entry) = base.remove(old) {
6445 base.entry(new.clone()).or_insert(entry);
6446 }
6447 for side in [&mut ours, &mut theirs] {
6449 if let Some(entry) = side.remove(old) {
6450 side.entry(new.clone()).or_insert(entry);
6451 }
6452 }
6453 }
6454 (base, ours, theirs)
6455}
6456
6457fn parent_dir(path: &[u8]) -> Option<&[u8]> {
6461 path.iter().rposition(|b| *b == b'/').map(|i| &path[..i])
6462}
6463
6464fn apply_dir_rename(old_dir: &[u8], new_dir: &[u8], path: &[u8]) -> Vec<u8> {
6468 let rest_start = if new_dir.is_empty() {
6471 old_dir.len() + 1
6472 } else {
6473 old_dir.len()
6474 };
6475 let mut out = new_dir.to_vec();
6476 out.extend_from_slice(&path[rest_start..]);
6477 out
6478}
6479
6480fn check_dir_renamed<'a>(
6484 path: &[u8],
6485 dir_renames: &'a BTreeMap<Vec<u8>, Vec<u8>>,
6486) -> Option<(&'a [u8], &'a [u8])> {
6487 let mut cur = parent_dir(path);
6488 while let Some(dir) = cur {
6489 if let Some((old_dir, new_dir)) = dir_renames.get_key_value(dir) {
6490 return Some((old_dir.as_slice(), new_dir.as_slice()));
6491 }
6492 cur = parent_dir(dir);
6493 }
6494 None
6495}
6496
6497struct DirectoryRenameMaps {
6500 ours: BTreeMap<Vec<u8>, Vec<u8>>,
6503 theirs: BTreeMap<Vec<u8>, Vec<u8>>,
6505 ours_split: BTreeSet<Vec<u8>>,
6509 theirs_split: BTreeSet<Vec<u8>>,
6511}
6512
6513fn compute_directory_renames(
6522 ours_map: &MergeEntryMap,
6523 theirs_map: &MergeEntryMap,
6524 ours_side: &SideRenames,
6525 theirs_side: &SideRenames,
6526) -> DirectoryRenameMaps {
6527 let ours = compute_side_dir_renames(&ours_side.pairs, ours_map);
6528 let theirs = compute_side_dir_renames(&theirs_side.pairs, theirs_map);
6529
6530 let mut ours_map_out = ours.renames;
6534 let mut theirs_map_out = theirs.renames;
6535 let dup: Vec<Vec<u8>> = ours_map_out
6536 .keys()
6537 .filter(|k| theirs_map_out.contains_key(*k))
6538 .cloned()
6539 .collect();
6540 for k in dup {
6541 ours_map_out.remove(&k);
6542 theirs_map_out.remove(&k);
6543 }
6544
6545 DirectoryRenameMaps {
6546 ours: ours_map_out,
6547 theirs: theirs_map_out,
6548 ours_split: ours.split,
6549 theirs_split: theirs.split,
6550 }
6551}
6552
6553struct SideDirRenames {
6555 renames: BTreeMap<Vec<u8>, Vec<u8>>,
6556 split: BTreeSet<Vec<u8>>,
6557}
6558
6559fn compute_side_dir_renames(
6562 pairs: &[(Vec<u8>, Vec<u8>)],
6563 side_map: &MergeEntryMap,
6564) -> SideDirRenames {
6565 let mut counts: BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>> = BTreeMap::new();
6573 for (old, new) in pairs {
6574 update_dir_rename_counts(&mut counts, old, new);
6575 }
6576
6577 let mut renames = BTreeMap::new();
6578 let mut split = BTreeSet::new();
6579 for (old_dir, targets) in counts {
6580 let mut max = 0usize;
6581 let mut bad_max = 0usize;
6582 let mut best: Option<Vec<u8>> = None;
6583 for (target, count) in &targets {
6584 if *count == max {
6585 bad_max = max;
6586 } else if *count > max {
6587 max = *count;
6588 best = Some(target.clone());
6589 }
6590 }
6591 if max == 0 {
6592 continue;
6593 }
6594 if bad_max == max {
6595 split.insert(old_dir);
6596 continue;
6597 }
6598 if let Some(best) = best
6603 && directory_fully_removed(&old_dir, side_map)
6604 {
6605 renames.insert(old_dir, best);
6606 }
6607 }
6608
6609 SideDirRenames { renames, split }
6610}
6611
6612fn update_dir_rename_counts(
6618 counts: &mut BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>>,
6619 old: &[u8],
6620 new: &[u8],
6621) {
6622 let mut old_dir = old.to_vec();
6624 let mut new_dir = new.to_vec();
6625 let mut first = true;
6626 loop {
6627 let old_has = dir_munge(&mut old_dir);
6630 let new_has = dir_munge(&mut new_dir);
6631
6632 if !first {
6636 let old_sub = trailing_component(old, &old_dir);
6637 let new_sub = trailing_component(new, &new_dir);
6638 if old_sub != new_sub {
6639 break;
6640 }
6641 }
6642
6643 if old_dir == new_dir {
6644 break;
6647 }
6648 *counts
6649 .entry(old_dir.clone())
6650 .or_default()
6651 .entry(new_dir.clone())
6652 .or_default() += 1;
6653
6654 first = false;
6655 if old_dir.is_empty() || new_dir.is_empty() {
6657 break;
6658 }
6659 if !old_has || !new_has {
6662 break;
6663 }
6664 }
6665}
6666
6667fn dir_munge(buf: &mut Vec<u8>) -> bool {
6671 match buf.iter().rposition(|b| *b == b'/') {
6672 Some(i) => {
6673 buf.truncate(i);
6674 true
6675 }
6676 None => {
6677 buf.clear();
6678 false
6679 }
6680 }
6681}
6682
6683fn trailing_component<'a>(full: &'a [u8], dir: &[u8]) -> &'a [u8] {
6687 if dir.is_empty() {
6688 full
6689 } else {
6690 &full[dir.len() + 1..]
6692 }
6693}
6694
6695fn directory_fully_removed(dir: &[u8], side_map: &MergeEntryMap) -> bool {
6698 let mut prefix = dir.to_vec();
6699 prefix.push(b'/');
6700 for path in side_map.keys() {
6701 if path.starts_with(&prefix) {
6702 return false;
6703 }
6704 }
6705 true
6706}
6707
6708struct DirRenameMove {
6712 from: Vec<u8>,
6715 to: Vec<u8>,
6717 renamed_from: Option<Vec<u8>>,
6721}
6722
6723struct DirRenameTwoToOne {
6724 dest: Vec<u8>,
6725 ours_source: Vec<u8>,
6726 theirs_source: Vec<u8>,
6727 ours_label_path: Vec<u8>,
6728 theirs_label_path: Vec<u8>,
6729}
6730
6731#[derive(Clone)]
6734struct RehomeInfo {
6735 old_path: Vec<u8>,
6737 renamed_from: Option<Vec<u8>>,
6739 added_on_ours: bool,
6742}
6743
6744#[derive(Clone, Default)]
6746struct RehomeSides {
6747 ours: Option<RehomeInfo>,
6748 theirs: Option<RehomeInfo>,
6749}
6750
6751struct DirRenameCollision {
6756 dest: Vec<u8>,
6758 sources: Vec<Vec<u8>>,
6760}
6761
6762struct DirRenameOutcome {
6764 base: MergeEntryMap,
6768 ours: MergeEntryMap,
6769 theirs: MergeEntryMap,
6770 rehomed: BTreeMap<Vec<u8>, RehomeSides>,
6772 collisions: Vec<DirRenameCollision>,
6776 splits: BTreeSet<Vec<u8>>,
6778 dirty: bool,
6781 info_messages: Vec<MergeInfoMessage>,
6782}
6783
6784#[allow(clippy::too_many_arguments)]
6799fn apply_directory_renames(
6800 base_map: &MergeEntryMap,
6801 eff_base: &MergeEntryMap,
6802 eff_ours: &MergeEntryMap,
6803 eff_theirs: &MergeEntryMap,
6804 ours_side: &SideRenames,
6805 theirs_side: &SideRenames,
6806 dir_renames: &DirectoryRenameMaps,
6807 file_rename_dests: &BTreeMap<Vec<u8>, MergeRename>,
6808) -> DirRenameOutcome {
6809 let mut base = eff_base.clone();
6810 let mut ours = eff_ours.clone();
6811 let mut theirs = eff_theirs.clone();
6812 let mut rehomed = BTreeMap::new();
6813 let mut collisions = Vec::new();
6814 let mut splits = BTreeSet::new();
6815 let mut info_messages = Vec::new();
6816 let mut dirty = false;
6817
6818 let ours_excl = exclusion_dirs(&dir_renames.ours);
6822 let theirs_excl = exclusion_dirs(&dir_renames.theirs);
6823
6824 let ours_moves = plan_rehome(
6828 base_map,
6829 &ours,
6830 ours_side,
6831 &dir_renames.theirs,
6832 &ours_excl,
6833 &dir_renames.theirs_split,
6834 &mut collisions,
6835 &mut splits,
6836 &mut info_messages,
6837 &mut dirty,
6838 );
6839 let theirs_moves = plan_rehome(
6840 base_map,
6841 &theirs,
6842 theirs_side,
6843 &dir_renames.ours,
6844 &theirs_excl,
6845 &dir_renames.ours_split,
6846 &mut collisions,
6847 &mut splits,
6848 &mut info_messages,
6849 &mut dirty,
6850 );
6851
6852 apply_rehome_moves(
6853 base_map,
6854 file_rename_dests,
6855 &mut base,
6856 &mut ours,
6857 &mut theirs,
6858 ours_moves,
6859 true,
6860 &mut rehomed,
6861 &mut collisions,
6862 &mut dirty,
6863 );
6864 apply_rehome_moves(
6865 base_map,
6866 file_rename_dests,
6867 &mut base,
6868 &mut ours,
6869 &mut theirs,
6870 theirs_moves,
6871 false,
6872 &mut rehomed,
6873 &mut collisions,
6874 &mut dirty,
6875 );
6876
6877 DirRenameOutcome {
6878 base,
6879 ours,
6880 theirs,
6881 rehomed,
6882 collisions,
6883 splits,
6884 dirty,
6885 info_messages,
6886 }
6887}
6888
6889fn exclusion_dirs(side_dir_renames: &BTreeMap<Vec<u8>, Vec<u8>>) -> BTreeSet<Vec<u8>> {
6893 side_dir_renames.keys().cloned().collect()
6894}
6895
6896#[allow(clippy::too_many_arguments)]
6911fn plan_rehome(
6912 base_map: &MergeEntryMap,
6913 side: &MergeEntryMap,
6914 side_renames: &SideRenames,
6915 renamer_dirs: &BTreeMap<Vec<u8>, Vec<u8>>,
6916 exclusions: &BTreeSet<Vec<u8>>,
6917 split_dirs: &BTreeSet<Vec<u8>>,
6918 collisions: &mut Vec<DirRenameCollision>,
6919 splits: &mut BTreeSet<Vec<u8>>,
6920 info_messages: &mut Vec<MergeInfoMessage>,
6921 dirty: &mut bool,
6922) -> Vec<DirRenameMove> {
6923 if renamer_dirs.is_empty() && split_dirs.is_empty() {
6924 return Vec::new();
6925 }
6926
6927 let side_rename_src: BTreeMap<&[u8], &[u8]> = side_renames
6930 .pairs
6931 .iter()
6932 .map(|(o, n)| (n.as_slice(), o.as_slice()))
6933 .collect();
6934
6935 let candidates: Vec<Vec<u8>> = side
6936 .keys()
6937 .filter(|p| !base_map.contains_key(*p) || side_rename_src.contains_key(p.as_slice()))
6938 .cloned()
6939 .collect();
6940
6941 let mut planned: BTreeMap<Vec<u8>, Vec<DirRenameMove>> = BTreeMap::new();
6943 for path in candidates {
6944 if let Some(split_dir) = check_dir_split(&path, split_dirs) {
6945 splits.insert(split_dir.to_vec());
6946 *dirty = true;
6947 continue;
6948 }
6949 let Some((old_dir, new_dir)) = check_dir_renamed(&path, renamer_dirs) else {
6950 continue;
6951 };
6952 let new_dir_is_exclusion = exclusions.contains(new_dir);
6956 let new_dir_inside_exclusion = exclusions
6957 .iter()
6958 .any(|dir| directory_contains_proper(dir, new_dir));
6959 if new_dir_is_exclusion
6960 || (new_dir_inside_exclusion
6961 && !side_has_pure_add_under_dir(side, base_map, &side_rename_src, old_dir))
6962 {
6963 info_messages.push(MergeInfoMessage::DirRenameSkippedDueToRerename {
6964 old_dir: old_dir.to_vec(),
6965 path: path.clone(),
6966 new_dir: new_dir.to_vec(),
6967 });
6968 continue;
6969 }
6970 let dest = apply_dir_rename(old_dir, new_dir, &path);
6971 if dest == path {
6972 continue;
6974 }
6975 let renamed_from = side_rename_src.get(path.as_slice()).map(|s| s.to_vec());
6976 planned
6977 .entry(dest.clone())
6978 .or_default()
6979 .push(DirRenameMove {
6980 from: path,
6981 to: dest,
6982 renamed_from,
6983 });
6984 }
6985
6986 let mut moves = Vec::new();
6987 for (dest, group) in planned {
6988 if group.len() > 1 {
6989 *dirty = true;
6992 collisions.push(DirRenameCollision {
6993 dest,
6994 sources: group.into_iter().map(|m| m.from).collect(),
6995 });
6996 continue;
6997 }
6998 moves.push(group.into_iter().next().expect("non-empty"));
6999 }
7000 moves
7001}
7002
7003fn check_dir_split<'a>(path: &[u8], split_dirs: &'a BTreeSet<Vec<u8>>) -> Option<&'a [u8]> {
7004 let mut dir = parent_dir(path)?;
7005 loop {
7006 if let Some(split_dir) = split_dirs.get(dir) {
7007 return Some(split_dir);
7008 }
7009 dir = parent_dir(dir)?;
7010 }
7011}
7012
7013fn directory_contains_proper(parent: &[u8], child: &[u8]) -> bool {
7014 !parent.is_empty()
7015 && child.len() > parent.len()
7016 && child.starts_with(parent)
7017 && child[parent.len()] == b'/'
7018}
7019
7020fn side_has_pure_add_under_dir(
7021 side: &MergeEntryMap,
7022 base_map: &MergeEntryMap,
7023 side_rename_src: &BTreeMap<&[u8], &[u8]>,
7024 dir: &[u8],
7025) -> bool {
7026 side.keys().any(|path| {
7027 path_is_under_dir(path, dir)
7028 && !base_map.contains_key(path)
7029 && !side_rename_src.contains_key(path.as_slice())
7030 })
7031}
7032
7033fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
7034 !dir.is_empty() && path.len() > dir.len() && path.starts_with(dir) && path[dir.len()] == b'/'
7035}
7036
7037#[allow(clippy::too_many_arguments)]
7046fn apply_rehome_moves(
7047 original_base: &MergeEntryMap,
7048 file_rename_dests: &BTreeMap<Vec<u8>, MergeRename>,
7049 base: &mut MergeEntryMap,
7050 ours: &mut MergeEntryMap,
7051 theirs: &mut MergeEntryMap,
7052 moves: Vec<DirRenameMove>,
7053 side_is_ours: bool,
7054 rehomed: &mut BTreeMap<Vec<u8>, RehomeSides>,
7055 collisions: &mut Vec<DirRenameCollision>,
7056 dirty: &mut bool,
7057) {
7058 for mv in moves {
7059 let occupied_on_this_side = if side_is_ours {
7064 ours.contains_key(&mv.to) || map_has_directory_at(ours, &mv.to)
7065 } else {
7066 theirs.contains_key(&mv.to) || map_has_directory_at(theirs, &mv.to)
7067 };
7068 let occupied_by_cross_rename =
7069 file_rename_dests
7070 .get(&mv.to)
7071 .is_some_and(|rename| match (side_is_ours, rename.side) {
7072 (true, RenameSide::Theirs) | (false, RenameSide::Ours) => true,
7073 (true, RenameSide::Ours) | (false, RenameSide::Theirs) => false,
7074 });
7075 let base_entry_at_dest = original_base.get(&mv.to).copied();
7076 let base_entry_at_source = original_base.get(&mv.from).copied();
7077 let other_side_entry_at_dest = if side_is_ours {
7078 theirs.get(&mv.to).copied()
7079 } else {
7080 ours.get(&mv.to).copied()
7081 };
7082 let other_side_entry_at_source = if side_is_ours {
7083 theirs.get(&mv.from).copied()
7084 } else {
7085 ours.get(&mv.from).copied()
7086 };
7087 let base_entry_for_shifted_source = base_entry_at_source.or(base_entry_at_dest);
7088 let rename_back_to_modified_source = mv
7089 .renamed_from
7090 .as_ref()
7091 .is_some_and(|source| source == &mv.to)
7092 && base_entry_at_dest.is_some()
7093 && (other_side_entry_at_dest.is_some_and(|entry| Some(entry) != base_entry_at_dest)
7094 || other_side_entry_at_source
7095 .is_some_and(|entry| Some(entry) != base_entry_for_shifted_source));
7096 if ((base_entry_at_dest.is_some() && !rename_back_to_modified_source)
7097 || (occupied_on_this_side && !occupied_by_cross_rename))
7098 && mv.to != mv.from
7099 {
7100 *dirty = true;
7101 collisions.push(DirRenameCollision {
7102 dest: mv.to.clone(),
7103 sources: vec![mv.from.clone()],
7104 });
7105 continue;
7106 }
7107 let mut moved = false;
7108 if occupied_by_cross_rename {
7109 base.remove(&mv.from);
7110 if side_is_ours {
7111 if let Some(entry) = ours.remove(&mv.from) {
7112 ours.insert(mv.to.clone(), entry);
7113 moved = true;
7114 }
7115 theirs.remove(&mv.from);
7116 } else {
7117 ours.remove(&mv.from);
7118 if let Some(entry) = theirs.remove(&mv.from) {
7119 theirs.insert(mv.to.clone(), entry);
7120 moved = true;
7121 }
7122 }
7123 } else {
7124 for m in [&mut *base, &mut *ours, &mut *theirs] {
7128 if let Some(entry) = m.remove(&mv.from) {
7129 m.insert(mv.to.clone(), entry);
7130 moved = true;
7131 }
7132 }
7133 }
7134 if moved {
7135 let info = RehomeInfo {
7136 old_path: mv.from.clone(),
7137 renamed_from: mv.renamed_from.clone(),
7138 added_on_ours: side_is_ours,
7139 };
7140 let entry = rehomed.entry(mv.to.clone()).or_default();
7141 if side_is_ours {
7142 entry.ours = Some(info);
7143 } else {
7144 entry.theirs = Some(info);
7145 }
7146 }
7147 }
7148}
7149
7150fn collect_dir_rename_two_to_one(
7151 renames: &MergeRenames,
7152 rehomed: &BTreeMap<Vec<u8>, RehomeSides>,
7153) -> Vec<DirRenameTwoToOne> {
7154 let mut conflicts = Vec::new();
7155 for (dest, sides) in rehomed {
7156 let Some(file_rename) = renames.dest_to_source.get(dest) else {
7157 continue;
7158 };
7159 match file_rename.side {
7160 RenameSide::Ours => {
7161 let Some(info) = sides.theirs.as_ref() else {
7162 continue;
7163 };
7164 let Some(theirs_source) = info.renamed_from.as_ref() else {
7165 continue;
7166 };
7167 conflicts.push(DirRenameTwoToOne {
7168 dest: dest.clone(),
7169 ours_source: file_rename.source.clone(),
7170 theirs_source: theirs_source.clone(),
7171 ours_label_path: dest.clone(),
7172 theirs_label_path: info.old_path.clone(),
7173 });
7174 }
7175 RenameSide::Theirs => {
7176 let Some(info) = sides.ours.as_ref() else {
7177 continue;
7178 };
7179 let Some(ours_source) = info.renamed_from.as_ref() else {
7180 continue;
7181 };
7182 conflicts.push(DirRenameTwoToOne {
7183 dest: dest.clone(),
7184 ours_source: ours_source.clone(),
7185 theirs_source: file_rename.source.clone(),
7186 ours_label_path: info.old_path.clone(),
7187 theirs_label_path: dest.clone(),
7188 });
7189 }
7190 }
7191 }
7192 conflicts
7193}
7194
7195fn map_has_directory_at(map: &MergeEntryMap, path: &[u8]) -> bool {
7196 let mut prefix = path.to_vec();
7197 prefix.push(b'/');
7198 map.keys().any(|candidate| candidate.starts_with(&prefix))
7199}
7200
7201fn remap_rename_destinations(renames: &mut MergeRenames, rehomed: &BTreeMap<Vec<u8>, RehomeSides>) {
7202 if rehomed.is_empty() {
7203 return;
7204 }
7205 let mut remapped_deletes = BTreeMap::new();
7206 for (dest, rd) in std::mem::take(&mut renames.rename_deletes) {
7207 let new_dest = rehomed
7208 .iter()
7209 .find_map(|(new_dest, sides)| {
7210 let moved = sides
7211 .ours
7212 .as_ref()
7213 .is_some_and(|info| info.old_path == dest)
7214 || sides
7215 .theirs
7216 .as_ref()
7217 .is_some_and(|info| info.old_path == dest);
7218 moved.then(|| new_dest.clone())
7219 })
7220 .unwrap_or(dest);
7221 remapped_deletes.insert(new_dest, rd);
7222 }
7223 renames.rename_deletes = remapped_deletes;
7224
7225 for rename in renames.rename_rename_one_to_two.values_mut() {
7226 for (dest, sides) in rehomed {
7227 if sides
7228 .ours
7229 .as_ref()
7230 .is_some_and(|info| info.old_path == rename.ours_dest)
7231 {
7232 rename.ours_dest = dest.clone();
7233 }
7234 if sides
7235 .theirs
7236 .as_ref()
7237 .is_some_and(|info| info.old_path == rename.theirs_dest)
7238 {
7239 rename.theirs_dest = dest.clone();
7240 }
7241 }
7242 }
7243}
7244
7245fn drop_collapsed_rename_rename_conflicts(renames: &mut MergeRenames) {
7246 renames
7247 .rename_rename_one_to_two
7248 .retain(|_, rename| rename.ours_dest != rename.theirs_dest);
7249}
7250
7251fn apply_dir_rename_two_to_one_conflicts(
7252 db: &FileObjectDatabase,
7253 eff_ours: &MergeEntryMap,
7254 eff_theirs: &MergeEntryMap,
7255 conflicts: &[DirRenameTwoToOne],
7256 paths: &mut [MergedPath],
7257 leaves: &mut MergeEntryMap,
7258 options: &MergeTreesOptions<'_>,
7259) -> Result<()> {
7260 for conflict in conflicts {
7261 let Some(slot) = paths.iter_mut().find(|path| path.path == conflict.dest) else {
7262 continue;
7263 };
7264 let ours_entry = eff_ours.get(&conflict.dest).copied();
7265 let theirs_entry = eff_theirs.get(&conflict.dest).copied();
7266 let (Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
7267 (ours_entry, theirs_entry)
7268 else {
7269 continue;
7270 };
7271 let ours_bytes = merge_blob_bytes(db, &ours_oid)?;
7272 let theirs_bytes = merge_blob_bytes(db, &theirs_oid)?;
7273 let (resolved_mode, mode_conflict) = merge_file_modes(None, ours_mode, theirs_mode);
7274 let result = if is_mergeable_file_mode(ours_mode) && is_mergeable_file_mode(theirs_mode) {
7275 merge_blobs(
7276 &[],
7277 &ours_bytes,
7278 &theirs_bytes,
7279 &MergeBlobOptions {
7280 ours_label: &qualify_label(options.ours_label, &conflict.ours_label_path),
7281 theirs_label: &qualify_label(options.theirs_label, &conflict.theirs_label_path),
7282 base_label: options.ancestor_label,
7283 style: options.style,
7284 },
7285 )
7286 } else {
7287 MergeBlobResult {
7288 content: ours_bytes.clone(),
7289 conflicted: true,
7290 }
7291 };
7292 let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
7293 leaves.insert(conflict.dest.clone(), (resolved_mode, oid));
7294 slot.stages = MergeStages {
7295 base: None,
7296 ours: ours_entry,
7297 theirs: theirs_entry,
7298 };
7299 slot.result = Some((resolved_mode, oid));
7300 slot.worktree = Some((
7301 if ours_mode == theirs_mode {
7302 ours_mode
7303 } else {
7304 0o100644
7305 },
7306 result.content,
7307 ));
7308 slot.conflict = Some(MergeConflictKind::RenameRenameTwoToOne {
7309 ours_path: conflict.ours_source.clone(),
7310 theirs_path: conflict.theirs_source.clone(),
7311 });
7312 slot.auto_merged = !mode_conflict;
7313 }
7314 Ok(())
7315}
7316
7317#[allow(clippy::too_many_arguments)]
7318fn apply_rename_rename_one_to_two_conflicts(
7319 db: &FileObjectDatabase,
7320 base_map: &MergeEntryMap,
7321 eff_ours: &MergeEntryMap,
7322 eff_theirs: &MergeEntryMap,
7323 conflicts: &BTreeMap<Vec<u8>, RenameRenameOneToTwo>,
7324 paths: &mut Vec<MergedPath>,
7325 leaves: &mut MergeEntryMap,
7326 options: &MergeTreesOptions<'_>,
7327) -> Result<()> {
7328 for (old_path, conflict) in conflicts {
7329 let base_entry = base_map.get(old_path).copied();
7330 let ours_entry = eff_ours.get(&conflict.ours_dest).copied();
7331 let theirs_entry = eff_theirs.get(&conflict.theirs_dest).copied();
7332 let theirs_add_at_ours_dest = eff_theirs.get(&conflict.ours_dest).copied();
7333 let ours_add_at_theirs_dest = eff_ours.get(&conflict.theirs_dest).copied();
7334
7335 leaves.remove(old_path);
7336 leaves.remove(&conflict.ours_dest);
7337 leaves.remove(&conflict.theirs_dest);
7338 paths.retain(|path| {
7339 path.path != *old_path
7340 && path.path != conflict.ours_dest
7341 && path.path != conflict.theirs_dest
7342 });
7343
7344 paths.push(MergedPath {
7345 path: old_path.clone(),
7346 stages: MergeStages {
7347 base: base_entry,
7348 ours: None,
7349 theirs: None,
7350 },
7351 result: None,
7352 worktree: None,
7353 conflict: Some(MergeConflictKind::RenameRenameOneToTwo {
7354 old_path: old_path.clone(),
7355 ours_path: conflict.ours_dest.clone(),
7356 theirs_path: conflict.theirs_dest.clone(),
7357 ours_label: options.ours_label.to_string(),
7358 theirs_label: options.theirs_label.to_string(),
7359 }),
7360 auto_merged: false,
7361 });
7362
7363 let ours_worktree = match ours_entry {
7364 Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
7365 None => None,
7366 };
7367 paths.push(MergedPath {
7368 path: conflict.ours_dest.clone(),
7369 stages: MergeStages {
7370 base: None,
7371 ours: ours_entry,
7372 theirs: theirs_add_at_ours_dest,
7373 },
7374 result: None,
7375 worktree: ours_worktree,
7376 conflict: Some(MergeConflictKind::RenameRenameOneToTwoStage),
7377 auto_merged: false,
7378 });
7379
7380 let theirs_worktree = match theirs_entry {
7381 Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
7382 None => None,
7383 };
7384 paths.push(MergedPath {
7385 path: conflict.theirs_dest.clone(),
7386 stages: MergeStages {
7387 base: None,
7388 ours: ours_add_at_theirs_dest,
7389 theirs: theirs_entry,
7390 },
7391 result: None,
7392 worktree: theirs_worktree,
7393 conflict: Some(MergeConflictKind::RenameRenameOneToTwoStage),
7394 auto_merged: false,
7395 });
7396 }
7397 Ok(())
7398}
7399
7400fn qualify_label(label: &str, path: &[u8]) -> String {
7403 format!("{label}:{}", String::from_utf8_lossy(path))
7404}
7405
7406fn entry_map_as_tracked(map: &MergeEntryMap) -> BTreeMap<Vec<u8>, TrackedEntry> {
7409 map.iter()
7410 .map(|(path, (mode, oid))| {
7411 (
7412 path.clone(),
7413 TrackedEntry {
7414 mode: *mode,
7415 oid: *oid,
7416 },
7417 )
7418 })
7419 .collect()
7420}
7421
7422#[cfg(test)]
7423mod tests {
7424 use super::*;
7425 use sley_formats::RepositoryLayout;
7426 use sley_object::TreeEntry;
7427 use sley_odb::ObjectWriter;
7428 use std::path::PathBuf;
7429 use std::sync::atomic::{AtomicU64, Ordering};
7430
7431 static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
7432
7433 #[test]
7434 fn name_status_reports_added_from_index() {
7435 let root = temp_root();
7436 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7437 .expect("test operation should succeed");
7438 let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
7439 let oid = db
7440 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
7441 .expect("test operation should succeed");
7442 let index = Index {
7443 version: 2,
7444 entries: vec![sley_index::IndexEntry {
7445 ctime_seconds: 0,
7446 ctime_nanoseconds: 0,
7447 mtime_seconds: 0,
7448 mtime_nanoseconds: 0,
7449 dev: 0,
7450 ino: 0,
7451 mode: 0o100644,
7452 uid: 0,
7453 gid: 0,
7454 size: 6,
7455 oid,
7456 flags: "hello.txt".len() as u16,
7457 flags_extended: 0,
7458 path: BString::from(b"hello.txt"),
7459 }],
7460 extensions: Vec::new(),
7461 checksum: None,
7462 };
7463 fs::write(
7464 layout.git_dir.join("index"),
7465 index
7466 .write_v2_sha1()
7467 .expect("test operation should succeed"),
7468 )
7469 .expect("test operation should succeed");
7470 fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
7471 let changes = diff_name_status_head_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
7472 .expect("test operation should succeed");
7473 assert_eq!(changes[0].line(), "A\thello.txt");
7474 fs::remove_dir_all(root).expect("test operation should succeed");
7475 }
7476
7477 #[test]
7478 fn index_worktree_diff_returns_staged_gitlinks() {
7479 let root = temp_root();
7480 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7481 .expect("test operation should succeed");
7482 let oid = ObjectId::from_hex(
7483 ObjectFormat::Sha1,
7484 "1111111111111111111111111111111111111111",
7485 )
7486 .expect("test operation should succeed");
7487 let index = Index {
7488 version: 2,
7489 entries: vec![sley_index::IndexEntry {
7490 ctime_seconds: 0,
7491 ctime_nanoseconds: 0,
7492 mtime_seconds: 0,
7493 mtime_nanoseconds: 0,
7494 dev: 0,
7495 ino: 0,
7496 mode: sley_index::GITLINK_MODE,
7497 uid: 0,
7498 gid: 0,
7499 size: 0,
7500 oid,
7501 flags: "deps/sub".len() as u16,
7502 flags_extended: 0,
7503 path: BString::from(b"deps/sub"),
7504 }],
7505 extensions: Vec::new(),
7506 checksum: None,
7507 };
7508 fs::write(
7509 layout.git_dir.join("index"),
7510 index
7511 .write_v2_sha1()
7512 .expect("test operation should succeed"),
7513 )
7514 .expect("test operation should succeed");
7515
7516 let diff = diff_name_status_index_worktree_with_options_and_gitlinks(
7517 &root,
7518 &layout.git_dir,
7519 ObjectFormat::Sha1,
7520 DiffNameStatusOptions::default(),
7521 )
7522 .expect("test operation should succeed");
7523
7524 assert_eq!(diff.entries.len(), 1);
7525 let gitlinks = diff.staged_gitlinks;
7526 assert_eq!(gitlinks.len(), 1);
7527 assert_eq!(gitlinks[0].path.as_bytes(), b"deps/sub");
7528 assert_eq!(gitlinks[0].oid, oid);
7529 fs::remove_dir_all(root).expect("test operation should succeed");
7530 }
7531
7532 #[cfg(unix)]
7533 #[test]
7534 fn index_worktree_diff_ignores_untracked_dangling_symlink() {
7535 use std::os::unix::fs::symlink;
7536
7537 let root = temp_root();
7538 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7539 .expect("test operation should succeed");
7540 let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
7541 let oid = db
7542 .write_object(EncodedObject::new(ObjectType::Blob, b"clean\n".to_vec()))
7543 .expect("test operation should succeed");
7544 let index = Index {
7545 version: 2,
7546 entries: vec![sley_index::IndexEntry {
7547 ctime_seconds: 0,
7548 ctime_nanoseconds: 0,
7549 mtime_seconds: 0,
7550 mtime_nanoseconds: 0,
7551 dev: 0,
7552 ino: 0,
7553 mode: 0o100644,
7554 uid: 0,
7555 gid: 0,
7556 size: 6,
7557 oid,
7558 flags: "tracked.txt".len() as u16,
7559 flags_extended: 0,
7560 path: BString::from(b"tracked.txt"),
7561 }],
7562 extensions: Vec::new(),
7563 checksum: None,
7564 };
7565 fs::write(
7566 layout.git_dir.join("index"),
7567 index
7568 .write_v2_sha1()
7569 .expect("test operation should succeed"),
7570 )
7571 .expect("test operation should succeed");
7572 fs::write(root.join("tracked.txt"), b"clean\n").expect("test operation should succeed");
7573 symlink("missing-target", root.join("untracked-link"))
7574 .expect("test operation should succeed");
7575
7576 let changes = diff_name_status_index_worktree_with_options(
7577 &root,
7578 &layout.git_dir,
7579 ObjectFormat::Sha1,
7580 DiffNameStatusOptions {
7581 detect_renames: false,
7582 detect_copies: false,
7583 find_copies_harder: false,
7584 rename_empty: true,
7585 },
7586 )
7587 .expect("untracked dangling symlink should be ignored");
7588 assert!(changes.is_empty());
7589 fs::remove_dir_all(root).expect("test operation should succeed");
7590 }
7591
7592 #[test]
7593 fn index_worktree_diff_trusts_non_racy_stat_cache() {
7594 let root = temp_root();
7595 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7596 .expect("test operation should succeed");
7597 let worktree_path = root.join("tracked.txt");
7598 fs::write(&worktree_path, b"clean\n").expect("test operation should succeed");
7599 let metadata = fs::symlink_metadata(&worktree_path).expect("test operation should succeed");
7600 let (mtime_seconds, mtime_nanoseconds) =
7601 sley_index::file_mtime_parts(&metadata).expect("test operation should succeed");
7602 let bogus_oid = ObjectId::from_hex(
7603 ObjectFormat::Sha1,
7604 "1111111111111111111111111111111111111111",
7605 )
7606 .expect("test operation should succeed");
7607 let index = Index {
7608 version: 2,
7609 entries: vec![sley_index::IndexEntry {
7610 ctime_seconds: 0,
7611 ctime_nanoseconds: 0,
7612 mtime_seconds: mtime_seconds as u32,
7613 mtime_nanoseconds: mtime_nanoseconds as u32,
7614 dev: 0,
7615 ino: 0,
7616 mode: sley_index::worktree_metadata_mode(&metadata),
7617 uid: 0,
7618 gid: 0,
7619 size: metadata.len() as u32,
7620 oid: bogus_oid,
7621 flags: "tracked.txt".len() as u16,
7622 flags_extended: 0,
7623 path: BString::from(b"tracked.txt"),
7624 }],
7625 extensions: Vec::new(),
7626 checksum: None,
7627 };
7628 std::thread::sleep(std::time::Duration::from_millis(1100));
7629 fs::write(
7630 layout.git_dir.join("index"),
7631 index
7632 .write_v2_sha1()
7633 .expect("test operation should succeed"),
7634 )
7635 .expect("test operation should succeed");
7636
7637 let changes = diff_name_status_index_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
7638 .expect("test operation should succeed");
7639 assert!(
7640 changes.is_empty(),
7641 "a clean non-racy stat match must reuse the cached index oid"
7642 );
7643 fs::remove_dir_all(root).expect("test operation should succeed");
7644 }
7645
7646 fn temp_root() -> PathBuf {
7647 let path = std::env::temp_dir().join(format!(
7648 "sley-diff-{}-{}",
7649 std::process::id(),
7650 TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
7651 ));
7652 fs::create_dir_all(&path).expect("test operation should succeed");
7653 path
7654 }
7655
7656 fn merge_opts() -> MergeBlobOptions<'static> {
7659 MergeBlobOptions {
7660 ours_label: "ours",
7661 theirs_label: "theirs",
7662 base_label: "base",
7663 style: ConflictStyle::Merge,
7664 }
7665 }
7666
7667 #[test]
7668 fn split_lines_preserves_content_and_newlines() {
7669 let lines = split_lines(b"a\nb\nc\n");
7670 assert_eq!(lines.len(), 3);
7671 assert_eq!(lines[0].content, b"a\n");
7672 assert!(lines[0].has_newline);
7673 assert_eq!(lines[2].content, b"c\n");
7674 assert!(lines[2].has_newline);
7675 assert!(split_lines(b"").is_empty());
7676 }
7677
7678 #[test]
7679 fn split_lines_tracks_missing_final_newline() {
7680 let lines = split_lines(b"a\nb");
7681 assert_eq!(lines.len(), 2);
7682 assert!(lines[0].has_newline);
7683 assert!(!lines[1].has_newline);
7684 assert_eq!(lines[1].content, b"b");
7685 assert_eq!(lines[1].bytes_without_newline(), b"b");
7686 let with_nl = split_lines(b"b\n");
7688 assert_ne!(lines[1], with_nl[0]);
7689 }
7690
7691 #[test]
7692 fn myers_replace_single_line() {
7693 let old = split_lines(b"a\nb\nc\n");
7694 let new = split_lines(b"a\nx\nc\n");
7695 assert_eq!(
7696 myers_diff_lines(&old, &new),
7697 vec![
7698 DiffOp::Equal(1),
7699 DiffOp::Delete(1),
7700 DiffOp::Insert(1),
7701 DiffOp::Equal(1),
7702 ]
7703 );
7704 }
7705
7706 #[test]
7707 fn myers_identical_is_single_equal() {
7708 let old = split_lines(b"a\nb\nc\n");
7709 let new = split_lines(b"a\nb\nc\n");
7710 assert_eq!(myers_diff_lines(&old, &new), vec![DiffOp::Equal(3)]);
7711 }
7712
7713 #[test]
7714 fn myers_pure_insert_and_delete() {
7715 let empty = split_lines(b"");
7716 let two = split_lines(b"a\nb\n");
7717 assert_eq!(myers_diff_lines(&empty, &two), vec![DiffOp::Insert(2)]);
7718 assert_eq!(myers_diff_lines(&two, &empty), vec![DiffOp::Delete(2)]);
7719
7720 let old = split_lines(b"a\nb\nc\nd\n");
7721 let new = split_lines(b"a\nc\nd\n");
7722 assert_eq!(
7723 myers_diff_lines(&old, &new),
7724 vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)]
7725 );
7726 }
7727
7728 #[test]
7729 fn myers_reconstructs_new_and_is_minimal() {
7730 let old = split_lines(b"the\nquick\nbrown\nfox\n");
7732 let new = split_lines(b"the\nlazy\nbrown\ncat\n");
7733 let ops = myers_diff_lines(&old, &new);
7734 let mut oi = 0usize;
7735 let mut ni = 0usize;
7736 let mut edits = 0usize;
7737 let mut rebuilt: Vec<u8> = Vec::new();
7738 for op in &ops {
7739 match *op {
7740 DiffOp::Equal(n) => {
7741 for _ in 0..n {
7742 assert_eq!(old[oi], new[ni]);
7743 rebuilt.extend_from_slice(old[oi].content);
7744 oi += 1;
7745 ni += 1;
7746 }
7747 }
7748 DiffOp::Delete(n) => {
7749 oi += n;
7750 edits += n;
7751 }
7752 DiffOp::Insert(n) => {
7753 for _ in 0..n {
7754 rebuilt.extend_from_slice(new[ni].content);
7755 ni += 1;
7756 }
7757 edits += n;
7758 }
7759 }
7760 }
7761 assert_eq!(rebuilt, b"the\nlazy\nbrown\ncat\n");
7762 assert_eq!(edits, 4);
7764 }
7765
7766 #[test]
7767 fn merge_non_overlapping_changes_is_clean() {
7768 let base = b"a\nb\nc\nd\ne\n";
7769 let ours = b"A\nb\nc\nd\ne\n";
7770 let theirs = b"a\nb\nc\nd\nE\n";
7771 let result = merge_blobs(base, ours, theirs, &merge_opts());
7772 assert!(!result.conflicted);
7773 assert_eq!(result.content, b"A\nb\nc\nd\nE\n");
7774 }
7775
7776 #[test]
7777 fn merge_identical_changes_no_conflict() {
7778 let base = b"a\nb\nc\n";
7779 let ours = b"a\nX\nc\n";
7780 let theirs = b"a\nX\nc\n";
7781 let result = merge_blobs(base, ours, theirs, &merge_opts());
7782 assert!(!result.conflicted);
7783 assert_eq!(result.content, b"a\nX\nc\n");
7784 }
7785
7786 #[test]
7787 fn merge_overlapping_change_emits_exact_markers() {
7788 let base = b"a\nb\nc\n";
7789 let ours = b"a\nOURS\nc\n";
7790 let theirs = b"a\nTHEIRS\nc\n";
7791 let result = merge_blobs(base, ours, theirs, &merge_opts());
7792 assert!(result.conflicted);
7793 assert_eq!(
7794 result.content,
7795 b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\nc\n".to_vec(),
7796 );
7797 }
7798
7799 #[test]
7800 fn merge_diff3_style_includes_base_section() {
7801 let base = b"a\nb\nc\n";
7802 let ours = b"a\nOURS\nc\n";
7803 let theirs = b"a\nTHEIRS\nc\n";
7804 let options = MergeBlobOptions {
7805 style: ConflictStyle::Diff3,
7806 ..merge_opts()
7807 };
7808 let result = merge_blobs(base, ours, theirs, &options);
7809 assert!(result.conflicted);
7810 assert_eq!(
7811 result.content,
7812 b"a\n<<<<<<< ours\nOURS\n||||||| base\nb\n=======\nTHEIRS\n>>>>>>> theirs\nc\n"
7813 .to_vec(),
7814 );
7815 }
7816
7817 #[test]
7818 fn merge_empty_label_omits_trailing_space() {
7819 let base = b"a\nb\nc\n";
7820 let ours = b"a\nOURS\nc\n";
7821 let theirs = b"a\nTHEIRS\nc\n";
7822 let options = MergeBlobOptions {
7823 ours_label: "",
7824 theirs_label: "",
7825 base_label: "",
7826 style: ConflictStyle::Merge,
7827 };
7828 let result = merge_blobs(base, ours, theirs, &options);
7829 assert!(result.conflicted);
7830 assert_eq!(
7832 result.content,
7833 b"a\n<<<<<<<\nOURS\n=======\nTHEIRS\n>>>>>>>\nc\n".to_vec(),
7834 );
7835 }
7836
7837 #[test]
7838 fn merge_add_add_empty_base_conflicts() {
7839 let result = merge_blobs(b"", b"x\ny\n", b"p\nq\n", &merge_opts());
7840 assert!(result.conflicted);
7841 assert_eq!(
7842 result.content,
7843 b"<<<<<<< ours\nx\ny\n=======\np\nq\n>>>>>>> theirs\n".to_vec(),
7844 );
7845 }
7846
7847 #[test]
7848 fn merge_add_add_empty_base_identical_is_clean() {
7849 let result = merge_blobs(b"", b"x\ny\n", b"x\ny\n", &merge_opts());
7850 assert!(!result.conflicted);
7851 assert_eq!(result.content, b"x\ny\n");
7852 }
7853
7854 #[test]
7855 fn merge_deletion_one_side_takes_deletion() {
7856 let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nb\nc\n", &merge_opts());
7858 assert!(!result.conflicted);
7859 assert_eq!(result.content, b"a\nc\n");
7860 }
7861
7862 #[test]
7863 fn merge_deletion_vs_modification_conflicts() {
7864 let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nB!\nc\n", &merge_opts());
7866 assert!(result.conflicted);
7867 assert_eq!(
7869 result.content,
7870 b"a\n<<<<<<< ours\n=======\nB!\n>>>>>>> theirs\nc\n".to_vec(),
7871 );
7872 }
7873
7874 #[test]
7875 fn merge_missing_final_newline_marker_starts_on_own_line() {
7876 let base = b"a\nb";
7879 let ours = b"a\nOURS";
7880 let theirs = b"a\nTHEIRS";
7881 let result = merge_blobs(base, ours, theirs, &merge_opts());
7882 assert!(result.conflicted);
7883 assert_eq!(
7884 result.content,
7885 b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\n".to_vec(),
7886 );
7887 }
7888
7889 #[test]
7890 fn merge_clean_preserves_missing_final_newline() {
7891 let result = merge_blobs(b"a\nb\n", b"a\nb", b"a\nb\n", &merge_opts());
7894 assert!(!result.conflicted);
7895 assert_eq!(result.content, b"a\nb");
7896 }
7897
7898 #[test]
7899 fn merge_both_append_identical_tail_is_clean() {
7900 let result = merge_blobs(b"a\n", b"a\nz\n", b"a\nz\n", &merge_opts());
7901 assert!(!result.conflicted);
7902 assert_eq!(result.content, b"a\nz\n");
7903 }
7904
7905 #[test]
7906 fn merge_when_ours_equals_base_yields_theirs() {
7907 let base = b"b\na\n";
7910 let theirs = b"b\nb\nc\na\nc\n";
7911 let result = merge_blobs(base, base, theirs, &merge_opts());
7912 assert!(!result.conflicted);
7913 assert_eq!(result.content, theirs.to_vec());
7914 }
7915 fn applied(outcome: ApplyOutcome) -> Vec<u8> {
7916 match outcome {
7917 ApplyOutcome::Applied(bytes) => bytes,
7918 ApplyOutcome::Rejected => panic!("expected Applied, got Rejected"),
7919 }
7920 }
7921
7922 #[test]
7923 fn parse_multi_file_patch() {
7924 let patch = b"\
7925diff --git a/one.txt b/one.txt
7926index aaaaaaa..bbbbbbb 100644
7927--- a/one.txt
7928+++ b/one.txt
7929@@ -1,3 +1,3 @@
7930 alpha
7931-beta
7932+BETA
7933 gamma
7934diff --git a/two.txt b/two.txt
7935index ccccccc..ddddddd 100644
7936--- a/two.txt
7937+++ b/two.txt
7938@@ -1,2 +1,3 @@
7939 first
7940+inserted
7941 second
7942";
7943 let patches = parse_unified_patch(patch).expect("test operation should succeed");
7944 assert_eq!(patches.len(), 2);
7945
7946 assert_eq!(patches[0].old_path.as_deref(), Some(b"one.txt".as_slice()));
7947 assert_eq!(patches[0].new_path.as_deref(), Some(b"one.txt".as_slice()));
7948 assert_eq!(patches[0].old_mode, None);
7949 assert_eq!(patches[0].hunks.len(), 1);
7950 let h = &patches[0].hunks[0];
7951 assert_eq!(
7952 (h.old_start, h.old_len, h.new_start, h.new_len),
7953 (1, 3, 1, 3)
7954 );
7955 assert_eq!(
7956 h.lines,
7957 vec![
7958 HunkLine::Context(b"alpha".to_vec()),
7959 HunkLine::Delete(b"beta".to_vec()),
7960 HunkLine::Insert(b"BETA".to_vec()),
7961 HunkLine::Context(b"gamma".to_vec()),
7962 ]
7963 );
7964
7965 assert_eq!(patches[1].new_path.as_deref(), Some(b"two.txt".as_slice()));
7966 assert_eq!(patches[1].hunks[0].new_len, 3);
7967 }
7968
7969 #[test]
7970 fn parse_default_hunk_range_length() {
7971 let patch = b"\
7973--- a/x
7974+++ b/x
7975@@ -1 +1,2 @@
7976 line
7977+added
7978";
7979 let patches = parse_unified_patch(patch).expect("test operation should succeed");
7980 let h = &patches[0].hunks[0];
7981 assert_eq!(
7982 (h.old_start, h.old_len, h.new_start, h.new_len),
7983 (1, 1, 1, 2)
7984 );
7985 }
7986
7987 #[test]
7988 fn parse_hunk_header_before_file_errors() {
7989 let patch = b"@@ -1,1 +1,1 @@\n context\n";
7990 assert!(parse_unified_patch(patch).is_err());
7991 }
7992
7993 #[test]
7994 fn parse_mismatched_counts_errors() {
7995 let patch = b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n only\n+new\n";
7997 assert!(parse_unified_patch(patch).is_err());
7998 }
7999
8000 #[test]
8001 fn apply_clean_hunk() {
8002 let base = b"alpha\nbeta\ngamma\n";
8003 let patch = parse_unified_patch(
8004 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8005 )
8006 .expect("test operation should succeed");
8007 let out = applied(apply_file_patch(base, &patch[0]));
8008 assert_eq!(out, b"alpha\nBETA\ngamma\n");
8009 }
8010
8011 #[test]
8012 fn apply_with_line_offset() {
8013 let base = b"pre1\npre2\npre3\nalpha\nbeta\ngamma\ntail\n";
8020 let patch = parse_unified_patch(
8021 b"--- a/x\n+++ b/x\n@@ -2,4 +2,4 @@\n alpha\n-beta\n+BETA\n gamma\n tail\n",
8022 )
8023 .expect("test operation should succeed");
8024 let out = applied(apply_file_patch(base, &patch[0]));
8025 assert_eq!(out, b"pre1\npre2\npre3\nalpha\nBETA\ngamma\ntail\n");
8026 }
8027
8028 #[test]
8029 fn apply_with_negative_line_offset() {
8030 let base = b"alpha\nbeta\ngamma\n";
8032 let patch = parse_unified_patch(
8033 b"--- a/x\n+++ b/x\n@@ -50,3 +50,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8034 )
8035 .expect("test operation should succeed");
8036 let out = applied(apply_file_patch(base, &patch[0]));
8037 assert_eq!(out, b"alpha\nBETA\ngamma\n");
8038 }
8039
8040 #[test]
8041 fn apply_multiple_hunks() {
8042 let base = b"a\nb\nc\nd\ne\nf\ng\nh\n";
8043 let patch = parse_unified_patch(
8044 b"--- a/x\n+++ b/x\n\
8045@@ -1,3 +1,3 @@\n a\n-b\n+B\n c\n\
8046@@ -6,3 +6,3 @@\n f\n-g\n+G\n h\n",
8047 )
8048 .expect("test operation should succeed");
8049 let out = applied(apply_file_patch(base, &patch[0]));
8050 assert_eq!(out, b"a\nB\nc\nd\ne\nf\nG\nh\n");
8051 }
8052
8053 #[test]
8054 fn reject_on_context_mismatch() {
8055 let base = b"alpha\nDIFFERENT\ngamma\n";
8056 let patch = parse_unified_patch(
8057 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8058 )
8059 .expect("test operation should succeed");
8060 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8061 }
8062
8063 #[test]
8064 fn reject_when_match_end_required_but_not_at_eof() {
8065 let base = b"one\ntwo\nanchor\nalready\nappended\n";
8072 let patch =
8076 parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n")
8077 .expect("test operation should succeed");
8078 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8079 }
8080
8081 #[test]
8082 fn append_at_eof_matches_when_context_reaches_end() {
8083 let base = b"one\ntwo\nanchor\n";
8087 let patch =
8088 parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n")
8089 .expect("test operation should succeed");
8090 let out = applied(apply_file_patch(base, &patch[0]));
8091 assert_eq!(out, b"one\ntwo\nanchor\nadded1\nadded2\n");
8092 }
8093
8094 #[test]
8095 fn reject_when_match_beginning_required_but_not_at_start() {
8096 let base = b"junk\nalpha\nbeta\ngamma\n";
8100 let patch =
8101 parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n alpha\n+INSERT\n beta\n")
8102 .expect("test operation should succeed");
8103 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8104 }
8105
8106 #[test]
8107 fn no_default_fuzz_rejects_on_trailing_context_mismatch() {
8108 let base = b"alpha\nbeta\nDIVERGED\n";
8114 let patch = parse_unified_patch(
8115 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8116 )
8117 .expect("test operation should succeed");
8118 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8119 }
8120
8121 #[test]
8122 fn parse_and_apply_new_file() {
8123 let patch = parse_unified_patch(
8124 b"\
8125diff --git a/new.txt b/new.txt
8126new file mode 100644
8127index 0000000..1111111
8128--- /dev/null
8129+++ b/new.txt
8130@@ -0,0 +1,2 @@
8131+hello
8132+world
8133",
8134 )
8135 .expect("test operation should succeed");
8136 assert!(patches_first_is_new(&patch));
8137 assert_eq!(patch[0].old_path, None);
8138 assert_eq!(patch[0].new_path.as_deref(), Some(b"new.txt".as_slice()));
8139 assert_eq!(patch[0].new_mode, Some(0o100644));
8140 let out = applied(apply_file_patch(b"garbage that is ignored", &patch[0]));
8142 assert_eq!(out, b"hello\nworld\n");
8143 }
8144
8145 fn patches_first_is_new(patches: &[FilePatch]) -> bool {
8146 patches.first().map(|p| p.is_new).unwrap_or(false)
8147 }
8148
8149 #[test]
8150 fn parse_and_apply_delete_file() {
8151 let patch = parse_unified_patch(
8152 b"\
8153diff --git a/gone.txt b/gone.txt
8154deleted file mode 100644
8155index 1111111..0000000
8156--- a/gone.txt
8157+++ /dev/null
8158@@ -1,2 +0,0 @@
8159-hello
8160-world
8161",
8162 )
8163 .expect("test operation should succeed");
8164 assert!(patch[0].is_delete);
8165 assert_eq!(patch[0].old_path.as_deref(), Some(b"gone.txt".as_slice()));
8166 assert_eq!(patch[0].new_path, None);
8167 assert_eq!(patch[0].old_mode, Some(0o100644));
8168 let out = applied(apply_file_patch(b"hello\nworld\n", &patch[0]));
8169 assert_eq!(out, b"");
8170 }
8171
8172 #[test]
8173 fn parse_rename_headers() {
8174 let patch = parse_unified_patch(
8175 b"\
8176diff --git a/old/name.txt b/new/name.txt
8177similarity index 100%
8178rename from old/name.txt
8179rename to new/name.txt
8180",
8181 )
8182 .expect("test operation should succeed");
8183 assert!(patch[0].is_rename);
8184 assert_eq!(
8185 patch[0].old_path.as_deref(),
8186 Some(b"old/name.txt".as_slice())
8187 );
8188 assert_eq!(
8189 patch[0].new_path.as_deref(),
8190 Some(b"new/name.txt".as_slice())
8191 );
8192 assert!(patch[0].hunks.is_empty());
8193 }
8194
8195 #[test]
8196 fn parse_mode_change_headers() {
8197 let patch = parse_unified_patch(
8198 b"\
8199diff --git a/script.sh b/script.sh
8200old mode 100644
8201new mode 100755
8202",
8203 )
8204 .expect("test operation should succeed");
8205 assert_eq!(patch[0].old_mode, Some(0o100644));
8206 assert_eq!(patch[0].new_mode, Some(0o100755));
8207 assert!(!patch[0].is_new);
8208 assert!(!patch[0].is_delete);
8209 }
8210
8211 #[test]
8212 fn no_final_newline_base_preserved_when_untouched() {
8213 let base = b"alpha\nbeta\nnotail"; let patch = parse_unified_patch(
8223 b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n-alpha\n+ALPHA\n beta\n notail\n\\ No newline at end of file\n",
8224 )
8225 .expect("test operation should succeed");
8226 let out = applied(apply_file_patch(base, &patch[0]));
8227 assert_eq!(out, b"ALPHA\nbeta\nnotail");
8228 }
8229
8230 #[test]
8231 fn no_final_newline_added_by_patch() {
8232 let base = b"alpha\nbeta\n";
8235 let patch = parse_unified_patch(
8236 b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n",
8237 )
8238 .expect("test operation should succeed");
8239 assert!(patch[0].hunks[0].new_no_newline);
8240 assert!(!patch[0].hunks[0].old_no_newline);
8241 let out = applied(apply_file_patch(base, &patch[0]));
8242 assert_eq!(out, b"alpha\nbeta-notail");
8243 }
8244
8245 #[test]
8246 fn no_final_newline_in_base_matched_and_kept() {
8247 let base = b"alpha\nbeta"; let patch = parse_unified_patch(
8251 b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n-alpha\n+ALPHA\n beta\n\\ No newline at end of file\n",
8252 )
8253 .expect("test operation should succeed");
8254 assert!(patch[0].hunks[0].old_no_newline);
8255 assert!(patch[0].hunks[0].new_no_newline);
8256 let out = applied(apply_file_patch(base, &patch[0]));
8257 assert_eq!(out, b"ALPHA\nbeta");
8258 }
8259
8260 #[test]
8261 fn no_final_newline_mismatch_rejected() {
8262 let base = b"alpha\nbeta\n"; let patch = parse_unified_patch(
8266 b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n\\ No newline at end of file\n+beta2\n",
8267 )
8268 .expect("test operation should succeed");
8269 assert!(patch[0].hunks[0].old_no_newline);
8270 assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8271 }
8272
8273 #[test]
8274 fn delete_with_no_final_newline() {
8275 let base = b"only line no newline";
8277 let patch = parse_unified_patch(
8278 b"--- a/x\n+++ /dev/null\n@@ -1,1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n",
8279 )
8280 .expect("test operation should succeed");
8281 assert!(patch[0].is_delete);
8282 let out = applied(apply_file_patch(base, &patch[0]));
8283 assert_eq!(out, b"");
8284 }
8285
8286 #[test]
8287 fn apply_pure_insertion_hunk() {
8288 let base = b"first\nsecond\n";
8289 let patch =
8290 parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n first\n+middle\n second\n")
8291 .expect("test operation should succeed");
8292 let out = applied(apply_file_patch(base, &patch[0]));
8293 assert_eq!(out, b"first\nmiddle\nsecond\n");
8294 }
8295
8296 #[test]
8297 fn apply_pure_deletion_hunk() {
8298 let base = b"first\nmiddle\nsecond\n";
8299 let patch =
8300 parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,2 @@\n first\n-middle\n second\n")
8301 .expect("test operation should succeed");
8302 let out = applied(apply_file_patch(base, &patch[0]));
8303 assert_eq!(out, b"first\nsecond\n");
8304 }
8305
8306 #[test]
8307 fn apply_then_reparse_round_trip() {
8308 let base = b"l1\nl2\nl3\nl4\nl5\n";
8312 let text = b"--- a/f\n+++ b/f\n@@ -2,3 +2,4 @@\n l2\n-l3\n+L3\n+L3b\n l4\n";
8313 let p1 = parse_unified_patch(text).expect("test operation should succeed");
8314 let p2 = parse_unified_patch(text).expect("test operation should succeed");
8315 assert_eq!(p1, p2);
8316 let out = applied(apply_file_patch(base, &p1[0]));
8317 assert_eq!(out, b"l1\nl2\nL3\nL3b\nl4\nl5\n");
8318 }
8319
8320 #[test]
8321 fn empty_context_line_without_trailing_space() {
8322 let base = b"a\n\nb\n";
8325 let patch = parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n a\n\n-b\n+B\n")
8326 .expect("test operation should succeed");
8327 assert_eq!(patch[0].hunks[0].lines[1], HunkLine::Context(Vec::new()));
8328 let out = applied(apply_file_patch(base, &patch[0]));
8329 assert_eq!(out, b"a\n\nB\n");
8330 }
8331
8332 #[test]
8333 fn split_blob_lines_handles_edge_cases() {
8334 assert!(split_blob_lines(b"").is_empty());
8335 let single = split_blob_lines(b"abc");
8336 assert_eq!(single.len(), 1);
8337 assert!(single[0].no_newline);
8338 let terminated = split_blob_lines(b"abc\n");
8339 assert_eq!(terminated.len(), 1);
8340 assert!(!terminated[0].no_newline);
8341 let blank_then_eof = split_blob_lines(b"x\n");
8342 assert_eq!(blank_then_eof.len(), 1);
8343 }
8344
8345 #[test]
8348 fn similarity_identical_and_empty_conventions() {
8349 assert_eq!(blob_similarity(b"hello\nworld\n", b"hello\nworld\n"), 100);
8351 assert_eq!(blob_similarity(b"", b""), 100);
8353 assert_eq!(blob_similarity(b"", b"hello\n"), 0);
8355 assert_eq!(blob_similarity(b"hello\n", b""), 0);
8356 }
8357
8358 #[test]
8359 fn similarity_one_changed_line_is_75_and_symmetric() {
8360 let a = b"one\ntwo\nthree\nfour\nfive\n";
8366 let b = b"one\ntwo\nTHREE\nfour\nfive\n";
8367 assert_eq!(blob_similarity(a, b), 75);
8368 assert_eq!(blob_similarity(b, a), 75);
8370 }
8371
8372 #[test]
8373 fn similarity_one_edited_line_of_three_is_66_not_67() {
8374 assert_eq!(blob_similarity(b"a\nb\nc\n", b"a\nB\nc\n"), 66);
8380 assert_eq!(blob_similarity(b"a\nB\nc\n", b"a\nb\nc\n"), 66);
8381 }
8382
8383 #[test]
8384 fn similarity_small_append_is_88() {
8385 let a = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n";
8389 let b = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n";
8390 assert_eq!(blob_similarity(a, b), 88);
8391 }
8392
8393 #[test]
8394 fn similarity_half_rewrite_is_50() {
8395 let a = b"l1\nl2\nl3\nl4\nl5\nl6\n";
8398 let b = b"l1\nl2\nl3\nX4\nX5\nX6\n";
8399 assert_eq!(blob_similarity(a, b), 50);
8400 }
8401
8402 fn write_blob(db: &mut FileObjectDatabase, bytes: &[u8]) -> ObjectId {
8406 db.write_object(EncodedObject::new(ObjectType::Blob, bytes.to_vec()))
8407 .expect("test operation should succeed")
8408 }
8409
8410 fn write_tree(db: &mut FileObjectDatabase, entries: &[(&[u8], u32, ObjectId)]) -> ObjectId {
8413 let mut tree_entries: Vec<TreeEntry> = entries
8414 .iter()
8415 .map(|(name, mode, oid)| TreeEntry {
8416 mode: *mode,
8417 name: BString::from(*name),
8418 oid: *oid,
8419 })
8420 .collect();
8421 tree_entries.sort_by(|a, b| a.name.cmp(&b.name));
8422 let tree = Tree {
8423 entries: tree_entries,
8424 };
8425 db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
8426 .expect("test operation should succeed")
8427 }
8428
8429 #[test]
8430 fn inexact_rename_detected_with_plausible_score() {
8431 let root = temp_root();
8434 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8435 .expect("test operation should succeed");
8436 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8437
8438 let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
8439 let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
8440 let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
8441 let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
8442
8443 let opts = RenameDetectionOptions {
8444 base: DiffNameStatusOptions {
8445 detect_renames: true,
8446 detect_copies: false,
8447 find_copies_harder: false,
8448 rename_empty: true,
8449 },
8450 detect_inexact: true,
8451 rename_threshold: DEFAULT_RENAME_THRESHOLD,
8452 copy_threshold: DEFAULT_RENAME_THRESHOLD,
8453 };
8454 let entries = diff_name_status_trees_with_rename_options(
8455 &db,
8456 ObjectFormat::Sha1,
8457 &left,
8458 &right,
8459 opts,
8460 )
8461 .expect("test operation should succeed");
8462
8463 assert_eq!(
8464 entries.len(),
8465 1,
8466 "expected a single rename entry: {entries:?}"
8467 );
8468 assert_eq!(entries[0].status, NameStatus::Renamed(75));
8469 assert_eq!(
8470 entries[0].old_path.as_ref().map(|p| p.as_bytes()),
8471 Some(b"a.txt".as_slice())
8472 );
8473 assert_eq!(entries[0].path, b"b.txt");
8474 assert_eq!(entries[0].line(), "R075\ta.txt\tb.txt");
8475 fs::remove_dir_all(root).expect("test operation should succeed");
8476 }
8477
8478 #[test]
8479 fn inexact_rename_below_threshold_not_detected() {
8480 let root = temp_root();
8483 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8484 .expect("test operation should succeed");
8485 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8486
8487 let old = write_blob(&mut db, b"l1\nl2\nl3\nl4\nl5\nl6\n");
8488 let new = write_blob(&mut db, b"l1\nl2\nl3\nX4\nX5\nX6\n");
8489 let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
8490 let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
8491
8492 let opts = RenameDetectionOptions {
8493 base: DiffNameStatusOptions {
8494 detect_renames: true,
8495 detect_copies: false,
8496 find_copies_harder: false,
8497 rename_empty: true,
8498 },
8499 detect_inexact: true,
8500 rename_threshold: 60,
8501 copy_threshold: 60,
8502 };
8503 let entries = diff_name_status_trees_with_rename_options(
8504 &db,
8505 ObjectFormat::Sha1,
8506 &left,
8507 &right,
8508 opts,
8509 )
8510 .expect("test operation should succeed");
8511
8512 let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
8513 assert!(
8514 statuses.contains(&NameStatus::Added) && statuses.contains(&NameStatus::Deleted),
8515 "expected separate add/delete below threshold, got {entries:?}"
8516 );
8517 assert!(
8518 !statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))),
8519 "no rename should be reported below threshold: {entries:?}"
8520 );
8521
8522 let opts_low = RenameDetectionOptions {
8525 rename_threshold: 50,
8526 ..opts
8527 };
8528 let entries_low = diff_name_status_trees_with_rename_options(
8529 &db,
8530 ObjectFormat::Sha1,
8531 &left,
8532 &right,
8533 opts_low,
8534 )
8535 .expect("test operation should succeed");
8536 assert_eq!(entries_low.len(), 1);
8537 assert_eq!(entries_low[0].status, NameStatus::Renamed(50));
8538 fs::remove_dir_all(root).expect("test operation should succeed");
8539 }
8540
8541 #[test]
8542 fn exact_rename_scores_100_and_takes_priority() {
8543 let root = temp_root();
8546 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8547 .expect("test operation should succeed");
8548 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8549
8550 let oid = write_blob(&mut db, b"identical\ncontent\nhere\n");
8551 let left = write_tree(&mut db, &[(b"old.txt", 0o100644, oid)]);
8552 let right = write_tree(&mut db, &[(b"new.txt", 0o100644, oid)]);
8553
8554 for inexact in [false, true] {
8555 let opts = RenameDetectionOptions {
8556 base: DiffNameStatusOptions {
8557 detect_renames: true,
8558 detect_copies: false,
8559 find_copies_harder: false,
8560 rename_empty: true,
8561 },
8562 detect_inexact: inexact,
8563 rename_threshold: DEFAULT_RENAME_THRESHOLD,
8564 copy_threshold: DEFAULT_RENAME_THRESHOLD,
8565 };
8566 let entries = diff_name_status_trees_with_rename_options(
8567 &db,
8568 ObjectFormat::Sha1,
8569 &left,
8570 &right,
8571 opts,
8572 )
8573 .expect("test operation should succeed");
8574 assert_eq!(entries.len(), 1, "inexact={inexact}: {entries:?}");
8575 assert_eq!(entries[0].status, NameStatus::Renamed(100));
8576 assert_eq!(
8577 entries[0].old_path.as_ref().map(|p| p.as_bytes()),
8578 Some(b"old.txt".as_slice())
8579 );
8580 assert_eq!(entries[0].path, b"new.txt");
8581 }
8582 fs::remove_dir_all(root).expect("test operation should succeed");
8583 }
8584
8585 #[test]
8586 fn inexact_copy_detected_with_score() {
8587 let root = temp_root();
8592 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8593 .expect("test operation should succeed");
8594 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8595
8596 let orig = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\neee\n");
8597 let copy = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\nEEE\n");
8598 let left = write_tree(&mut db, &[(b"orig.txt", 0o100644, orig.clone())]);
8599 let right = write_tree(
8600 &mut db,
8601 &[(b"orig.txt", 0o100644, orig), (b"copy.txt", 0o100644, copy)],
8602 );
8603
8604 let opts = RenameDetectionOptions {
8605 base: DiffNameStatusOptions {
8606 detect_renames: true,
8607 detect_copies: true,
8608 find_copies_harder: true,
8609 rename_empty: true,
8610 },
8611 detect_inexact: true,
8612 rename_threshold: DEFAULT_RENAME_THRESHOLD,
8613 copy_threshold: DEFAULT_RENAME_THRESHOLD,
8614 };
8615 let entries = diff_name_status_trees_with_rename_options(
8616 &db,
8617 ObjectFormat::Sha1,
8618 &left,
8619 &right,
8620 opts,
8621 )
8622 .expect("test operation should succeed");
8623
8624 let copy_entry = entries
8625 .iter()
8626 .find(|e| e.path == b"copy.txt")
8627 .unwrap_or_else(|| panic!("no copy.txt entry: {entries:?}"));
8628 assert_eq!(copy_entry.status, NameStatus::Copied(80));
8629 assert_eq!(
8630 copy_entry.old_path.as_ref().map(|p| p.as_bytes()),
8631 Some(b"orig.txt".as_slice())
8632 );
8633 assert!(
8635 entries.iter().all(|e| e.status != NameStatus::Deleted),
8636 "copy must not delete the source: {entries:?}"
8637 );
8638 fs::remove_dir_all(root).expect("test operation should succeed");
8639 }
8640
8641 #[test]
8642 fn inexact_rename_with_small_edit_scores_88() {
8643 let root = temp_root();
8646 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8647 .expect("test operation should succeed");
8648 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8649
8650 let old = write_blob(
8651 &mut db,
8652 b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n",
8653 );
8654 let new = write_blob(
8655 &mut db,
8656 b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n",
8657 );
8658 let left = write_tree(&mut db, &[(b"src.txt", 0o100644, old)]);
8659 let right = write_tree(&mut db, &[(b"dst.txt", 0o100644, new)]);
8660
8661 let opts = RenameDetectionOptions::inexact(DiffNameStatusOptions {
8662 detect_renames: true,
8663 detect_copies: false,
8664 find_copies_harder: false,
8665 rename_empty: true,
8666 });
8667 let entries = diff_name_status_trees_with_rename_options(
8668 &db,
8669 ObjectFormat::Sha1,
8670 &left,
8671 &right,
8672 opts,
8673 )
8674 .expect("test operation should succeed");
8675
8676 assert_eq!(entries.len(), 1, "{entries:?}");
8677 assert_eq!(entries[0].status, NameStatus::Renamed(88));
8678 assert_eq!(
8679 entries[0].old_path.as_ref().map(|p| p.as_bytes()),
8680 Some(b"src.txt".as_slice())
8681 );
8682 assert_eq!(entries[0].path, b"dst.txt");
8683 fs::remove_dir_all(root).expect("test operation should succeed");
8684 }
8685
8686 #[test]
8687 fn inexact_disabled_default_preserves_exact_only_behavior() {
8688 assert!(!RenameDetectionOptions::default().detect_inexact);
8692 assert_eq!(
8693 RenameDetectionOptions::default().rename_threshold,
8694 DEFAULT_RENAME_THRESHOLD
8695 );
8696
8697 let root = temp_root();
8698 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8699 .expect("test operation should succeed");
8700 let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8701
8702 let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
8703 let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
8704 let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
8705 let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
8706
8707 let entries = diff_name_status_trees_with_rename_options(
8708 &db,
8709 ObjectFormat::Sha1,
8710 &left,
8711 &right,
8712 RenameDetectionOptions::default(),
8713 )
8714 .expect("test operation should succeed");
8715 let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
8716 assert!(statuses.contains(&NameStatus::Added));
8717 assert!(statuses.contains(&NameStatus::Deleted));
8718 assert!(!statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))));
8719 fs::remove_dir_all(root).expect("test operation should succeed");
8720 }
8721
8722 fn apply_ops(old: &[DiffLine<'_>], new: &[DiffLine<'_>], ops: &[DiffOp]) -> Vec<u8> {
8730 let mut oi = 0usize;
8731 let mut ni = 0usize;
8732 let mut rebuilt: Vec<u8> = Vec::new();
8733 for op in ops {
8734 match *op {
8735 DiffOp::Equal(n) => {
8736 for _ in 0..n {
8737 assert_eq!(old[oi], new[ni], "Equal op covered unequal lines");
8739 rebuilt.extend_from_slice(old[oi].content);
8740 oi += 1;
8741 ni += 1;
8742 }
8743 }
8744 DiffOp::Delete(n) => oi += n,
8745 DiffOp::Insert(n) => {
8746 for _ in 0..n {
8747 rebuilt.extend_from_slice(new[ni].content);
8748 ni += 1;
8749 }
8750 }
8751 }
8752 }
8753 assert_eq!(oi, old.len(), "script did not consume all of old");
8755 assert_eq!(ni, new.len(), "script did not consume all of new");
8756 rebuilt
8757 }
8758
8759 fn assert_valid_script(old_bytes: &[u8], new_bytes: &[u8], ops: &[DiffOp]) {
8762 let old = split_lines(old_bytes);
8763 let new = split_lines(new_bytes);
8764 let rebuilt = apply_ops(&old, &new, ops);
8765 assert_eq!(rebuilt, new_bytes, "script did not rebuild new");
8766 for pair in ops.windows(2) {
8767 let same_kind = matches!(
8768 (pair[0], pair[1]),
8769 (DiffOp::Equal(_), DiffOp::Equal(_))
8770 | (DiffOp::Delete(_), DiffOp::Delete(_))
8771 | (DiffOp::Insert(_), DiffOp::Insert(_))
8772 );
8773 assert!(!same_kind, "ops not coalesced: {:?}", ops);
8774 }
8775 }
8776
8777 fn check_all_algorithms(old_bytes: &[u8], new_bytes: &[u8]) {
8780 let old = split_lines(old_bytes);
8781 let new = split_lines(new_bytes);
8782 for algo in [
8783 DiffAlgorithm::Myers,
8784 DiffAlgorithm::Minimal,
8785 DiffAlgorithm::Patience,
8786 DiffAlgorithm::Histogram,
8787 ] {
8788 let ops = diff_lines_with_algorithm(&old, &new, algo);
8789 assert_valid_script(old_bytes, new_bytes, &ops);
8790 }
8791 }
8792
8793 #[test]
8794 fn patience_and_histogram_match_myers_on_simple_cases() {
8795 let cases: &[(&[u8], &[u8], Vec<DiffOp>)] = &[
8798 (
8799 b"a\nb\nc\n",
8800 b"a\nx\nc\n",
8801 vec![
8802 DiffOp::Equal(1),
8803 DiffOp::Delete(1),
8804 DiffOp::Insert(1),
8805 DiffOp::Equal(1),
8806 ],
8807 ),
8808 (b"a\nb\nc\n", b"a\nb\nc\n", vec![DiffOp::Equal(3)]),
8809 (b"", b"a\nb\n", vec![DiffOp::Insert(2)]),
8810 (b"a\nb\n", b"", vec![DiffOp::Delete(2)]),
8811 (
8812 b"a\nb\nc\nd\n",
8813 b"a\nc\nd\n",
8814 vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)],
8815 ),
8816 ];
8817 for (old_bytes, new_bytes, expected) in cases {
8818 let old = split_lines(old_bytes);
8819 let new = split_lines(new_bytes);
8820 assert_eq!(&patience_diff_lines(&old, &new), expected);
8821 assert_eq!(&histogram_diff_lines(&old, &new), expected);
8822 assert_eq!(&myers_diff_lines(&old, &new), expected);
8823 }
8824 }
8825
8826 #[test]
8827 fn patience_handles_both_empty() {
8828 let empty = split_lines(b"");
8829 assert!(patience_diff_lines(&empty, &empty).is_empty());
8830 assert!(histogram_diff_lines(&empty, &empty).is_empty());
8831 }
8832
8833 #[test]
8834 fn patience_aligns_unique_anchors_across_moved_block() {
8835 check_all_algorithms(
8840 b"alpha\nbeta\ngamma\ndelta\n",
8841 b"gamma\ndelta\nalpha\nbeta\n",
8842 );
8843 }
8844
8845 #[test]
8846 fn histogram_differs_from_myers_keeping_block_contiguous() {
8847 let old = b"b\na\n";
8853 let new = b"a\nb\nb\na\nb\n";
8854 let old_l = split_lines(old);
8855 let new_l = split_lines(new);
8856
8857 let myers = myers_diff_lines(&old_l, &new_l);
8858 let histogram = histogram_diff_lines(&old_l, &new_l);
8859
8860 assert_valid_script(old, new, &myers);
8862 assert_valid_script(old, new, &histogram);
8863
8864 assert_eq!(
8867 myers,
8868 vec![
8869 DiffOp::Insert(1),
8870 DiffOp::Equal(1),
8871 DiffOp::Insert(1),
8872 DiffOp::Equal(1),
8873 DiffOp::Insert(1),
8874 ]
8875 );
8876 assert_eq!(
8877 histogram,
8878 vec![DiffOp::Insert(2), DiffOp::Equal(2), DiffOp::Insert(1)]
8879 );
8880 assert_ne!(myers, histogram);
8882 }
8883
8884 #[test]
8885 fn patience_differs_from_myers_on_repeated_lines() {
8886 let old = b"b\na\n";
8892 let new = b"a\na\nb\n";
8893 let old_l = split_lines(old);
8894 let new_l = split_lines(new);
8895
8896 let myers = myers_diff_lines(&old_l, &new_l);
8897 let patience = patience_diff_lines(&old_l, &new_l);
8898
8899 assert_valid_script(old, new, &myers);
8900 assert_valid_script(old, new, &patience);
8901
8902 assert_eq!(
8903 myers,
8904 vec![DiffOp::Delete(1), DiffOp::Equal(1), DiffOp::Insert(2)]
8905 );
8906 assert_eq!(
8907 patience,
8908 vec![DiffOp::Insert(2), DiffOp::Equal(1), DiffOp::Delete(1)]
8909 );
8910 assert_ne!(myers, patience);
8911 }
8912
8913 #[test]
8914 fn realistic_function_insertion_all_valid() {
8915 let old = b"int f() {\n return 1;\n}\n";
8920 let new = b"int g() {\n return 2;\n}\n\nint f() {\n return 1;\n}\n";
8921 check_all_algorithms(old, new);
8922 }
8923
8924 #[test]
8925 fn histogram_anchors_on_rare_line_when_no_unique_line_exists() {
8926 check_all_algorithms(b"x\nx\nmid\nx\nx\n", b"x\nmid\nx\nx\nx\n");
8931 check_all_algorithms(
8932 b"dup\ndup\nrare\ndup\ndup\n",
8933 b"dup\nrare\ndup\ndup\ndup\ndup\n",
8934 );
8935 }
8936
8937 #[test]
8938 fn all_algorithms_treat_missing_final_newline_as_change() {
8939 let old = split_lines(b"a\nb");
8941 let new = split_lines(b"a\nb\n");
8942 for algo in [
8943 DiffAlgorithm::Myers,
8944 DiffAlgorithm::Minimal,
8945 DiffAlgorithm::Patience,
8946 DiffAlgorithm::Histogram,
8947 ] {
8948 let ops = diff_lines_with_algorithm(&old, &new, algo);
8949 assert_eq!(
8950 ops,
8951 vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Insert(1)],
8952 "algorithm {:?} mishandled missing final newline",
8953 algo
8954 );
8955 }
8956 }
8957
8958 #[test]
8959 fn dispatcher_routes_each_variant() {
8960 let old = split_lines(b"a\nb\nc\n");
8961 let new = split_lines(b"a\nx\nc\n");
8962 assert_eq!(
8963 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Myers),
8964 myers_diff_lines(&old, &new)
8965 );
8966 assert_eq!(
8968 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Minimal),
8969 myers_diff_lines(&old, &new)
8970 );
8971 assert_eq!(
8972 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Patience),
8973 patience_diff_lines(&old, &new)
8974 );
8975 assert_eq!(
8976 diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Histogram),
8977 histogram_diff_lines(&old, &new)
8978 );
8979 }
8980
8981 #[test]
8982 fn patience_recurses_into_gaps_between_anchors() {
8983 let old = b"head\nmid1\nmid2\ntail\n";
8986 let new = b"head\nMID\nmid2\ntail\n";
8987 let old_l = split_lines(old);
8988 let new_l = split_lines(new);
8989 let ops = patience_diff_lines(&old_l, &new_l);
8990 assert_eq!(
8991 ops,
8992 vec![
8993 DiffOp::Equal(1),
8994 DiffOp::Delete(1),
8995 DiffOp::Insert(1),
8996 DiffOp::Equal(2),
8997 ]
8998 );
8999 assert_valid_script(old, new, &ops);
9000 }
9001
9002 #[test]
9003 fn patience_falls_back_to_myers_with_no_unique_lines() {
9004 let old = b"a\na\nb\nb\n";
9008 let new = b"a\na\na\nb\n";
9009 let old_l = split_lines(old);
9010 let new_l = split_lines(new);
9011 let ops = patience_diff_lines(&old_l, &new_l);
9012 assert_valid_script(old, new, &ops);
9017 }
9018
9019 #[test]
9020 fn algorithms_agree_with_myers_when_all_lines_distinct() {
9021 let cases: &[(&[u8], &[u8])] = &[
9024 (b"a\nb\nc\nd\ne\n", b"a\nc\nd\nf\ne\n"),
9025 (b"1\n2\n3\n4\n5\n6\n", b"1\n3\n2\n4\n6\n5\n"),
9026 (b"q\nw\ne\nr\nt\ny\n", b"q\nw\nx\nr\nt\nz\n"),
9027 ];
9028 for (old_bytes, new_bytes) in cases {
9029 let old = split_lines(old_bytes);
9030 let new = split_lines(new_bytes);
9031 let myers = myers_diff_lines(&old, &new);
9032 assert_eq!(
9033 patience_diff_lines(&old, &new),
9034 myers,
9035 "patience must equal Myers when all lines are distinct: {:?}",
9036 old_bytes
9037 );
9038 assert_eq!(
9039 histogram_diff_lines(&old, &new),
9040 myers,
9041 "histogram must equal Myers when all lines are distinct: {:?}",
9042 old_bytes
9043 );
9044 }
9045 }
9046
9047 #[test]
9048 fn fuzz_all_algorithms_reconstruct_new() {
9049 let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
9053 let mut next = || {
9054 state = state
9055 .wrapping_mul(6364136223846793005)
9056 .wrapping_add(1442695040888963407);
9057 (state >> 33) as u32
9058 };
9059 let alphabet = [b"a\n", b"b\n", b"c\n", b"d\n"];
9060 let build = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
9061 let len = (rng() % 9) as usize; let mut buf = Vec::new();
9063 for _ in 0..len {
9064 let pick = (rng() % alphabet.len() as u32) as usize;
9065 buf.extend_from_slice(alphabet[pick]);
9066 }
9067 if !buf.is_empty() && rng().is_multiple_of(4) {
9069 buf.pop();
9070 }
9071 buf
9072 };
9073 for _ in 0..400 {
9074 let old_bytes = build(&mut next);
9075 let new_bytes = build(&mut next);
9076 check_all_algorithms(&old_bytes, &new_bytes);
9077 }
9078 }
9079
9080 #[test]
9081 fn exhaustive_small_inputs_all_algorithms_reconstruct() {
9082 let syms = [b"a\n".to_vec(), b"b\n".to_vec(), b"c\n".to_vec()];
9087 let make = |n: usize, mut code: usize| -> Vec<u8> {
9088 let mut v = Vec::new();
9089 for _ in 0..n {
9090 v.extend_from_slice(&syms[code % 3]);
9091 code /= 3;
9092 }
9093 v
9094 };
9095 for la in 0..=5usize {
9096 for lb in 0..=5usize {
9097 for ca in 0..3usize.pow(la as u32) {
9098 for cb in 0..3usize.pow(lb as u32) {
9099 let ob = make(la, ca);
9100 let nb = make(lb, cb);
9101 let ol = split_lines(&ob);
9102 let nl = split_lines(&nb);
9103 assert_eq!(apply_ops(&ol, &nl, &myers_diff_lines(&ol, &nl)), nb);
9104 assert_eq!(apply_ops(&ol, &nl, &patience_diff_lines(&ol, &nl)), nb);
9105 assert_eq!(apply_ops(&ol, &nl, &histogram_diff_lines(&ol, &nl)), nb);
9106 }
9107 }
9108 }
9109 }
9110 }
9111
9112 #[test]
9113 fn fuzz_distinct_lines_patience_histogram_equal_myers() {
9114 let mut state: u64 = 0x1234_5678_9ABC_DEF0;
9118 let mut next = || {
9119 state = state
9120 .wrapping_mul(6364136223846793005)
9121 .wrapping_add(1442695040888963407);
9122 (state >> 33) as u32
9123 };
9124 for _ in 0..200 {
9125 let pick_subseq = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
9128 let mut buf = Vec::new();
9129 for t in 0..10u32 {
9130 if rng().is_multiple_of(2) {
9131 buf.extend_from_slice(format!("{t}\n").as_bytes());
9132 }
9133 }
9134 buf
9135 };
9136 let old_bytes = pick_subseq(&mut next);
9137 let new_bytes = pick_subseq(&mut next);
9138 let old = split_lines(&old_bytes);
9139 let new = split_lines(&new_bytes);
9140 let myers = myers_diff_lines(&old, &new);
9141 assert_eq!(patience_diff_lines(&old, &new), myers);
9142 assert_eq!(histogram_diff_lines(&old, &new), myers);
9143 }
9144 }
9145
9146 fn status_lines(entries: &[NameStatusEntry]) -> Vec<String> {
9155 entries.iter().map(|entry| entry.line()).collect()
9156 }
9157
9158 fn assert_tree_diff_matches_full(
9161 db: &FileObjectDatabase,
9162 left: &ObjectId,
9163 right: &ObjectId,
9164 options: DiffNameStatusOptions,
9165 ) {
9166 let (full_left, full_right) = collect_full_tree_pair(db, ObjectFormat::Sha1, left, right)
9168 .expect("test operation should succeed");
9169 let reference = diff_name_status_maps(
9170 &full_left,
9171 &full_right,
9172 full_left.keys().chain(full_right.keys()),
9173 options,
9174 )
9175 .expect("test operation should succeed");
9176
9177 let (pruned_left, pruned_right) = changed_tree_entries(db, ObjectFormat::Sha1, left, right)
9179 .expect("test operation should succeed");
9180 let pruned = diff_name_status_maps(
9181 &pruned_left,
9182 &pruned_right,
9183 pruned_left.keys().chain(pruned_right.keys()),
9184 options,
9185 )
9186 .expect("test operation should succeed");
9187
9188 assert_eq!(
9189 status_lines(&reference),
9190 status_lines(&pruned),
9191 "pruned map diff diverged from full map diff for {options:?}"
9192 );
9193
9194 let public =
9197 diff_name_status_trees_with_options(db, ObjectFormat::Sha1, left, right, options)
9198 .expect("test operation should succeed");
9199 assert_eq!(
9200 status_lines(&reference),
9201 status_lines(&public),
9202 "public tree diff diverged from full map diff for {options:?}"
9203 );
9204
9205 for (path, tracked) in &pruned_left {
9209 assert_eq!(
9210 full_left.get(path),
9211 Some(tracked),
9212 "pruned left entry not present (or differs) in full left map: {:?}",
9213 String::from_utf8_lossy(path)
9214 );
9215 }
9216 for (path, tracked) in &pruned_right {
9217 assert_eq!(
9218 full_right.get(path),
9219 Some(tracked),
9220 "pruned right entry not present (or differs) in full right map: {:?}",
9221 String::from_utf8_lossy(path)
9222 );
9223 }
9224 for entry in &reference {
9227 let path = entry.path.as_bytes();
9228 match entry.status {
9229 NameStatus::Added => assert!(
9230 pruned_right.contains_key(path),
9231 "added path dropped by pruning: {:?}",
9232 String::from_utf8_lossy(path)
9233 ),
9234 NameStatus::Deleted => assert!(
9235 pruned_left.contains_key(path),
9236 "deleted path dropped by pruning: {:?}",
9237 String::from_utf8_lossy(path)
9238 ),
9239 NameStatus::Modified => {
9240 assert!(
9241 pruned_left.contains_key(path) && pruned_right.contains_key(path),
9242 "modified path dropped by pruning: {:?}",
9243 String::from_utf8_lossy(path)
9244 );
9245 }
9246 _ => {}
9247 }
9248 }
9249 }
9250
9251 fn assert_tree_diff_matches_full_all_modes(
9255 db: &FileObjectDatabase,
9256 left: &ObjectId,
9257 right: &ObjectId,
9258 ) {
9259 for detect_renames in [false, true] {
9260 for detect_copies in [false, true] {
9261 let options = DiffNameStatusOptions {
9262 detect_renames,
9263 detect_copies,
9264 find_copies_harder: false,
9265 rename_empty: true,
9266 };
9267 assert_tree_diff_matches_full(db, left, right, options);
9268 }
9269 }
9270 }
9271
9272 fn structural_db() -> (PathBuf, FileObjectDatabase) {
9274 let root = temp_root();
9275 let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
9276 .expect("test operation should succeed");
9277 let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
9278 (root, db)
9279 }
9280
9281 #[test]
9282 fn pruned_walk_skips_identical_subtree_and_matches_full() {
9283 let (root, mut db) = structural_db();
9287
9288 let s1 = write_blob(&mut db, b"shared one\n");
9290 let s2 = write_blob(&mut db, b"shared two\n");
9291 let s3 = write_blob(&mut db, b"deep nested\n");
9292 let shared_inner = write_tree(&mut db, &[(b"c.txt", 0o100644, s3.clone())]);
9293 let shared = write_tree(
9294 &mut db,
9295 &[
9296 (b"a.txt", 0o100644, s1.clone()),
9297 (b"b.txt", 0o100644, s2.clone()),
9298 (b"inner", 0o040000, shared_inner.clone()),
9299 ],
9300 );
9301
9302 let app_old = write_blob(&mut db, b"version 1\n");
9304 let app_new = write_blob(&mut db, b"version 2\n");
9305 let app_left = write_tree(&mut db, &[(b"main.rs", 0o100644, app_old)]);
9306 let app_right = write_tree(&mut db, &[(b"main.rs", 0o100644, app_new)]);
9307
9308 let left = write_tree(
9309 &mut db,
9310 &[
9311 (b"app", 0o040000, app_left),
9312 (b"shared", 0o040000, shared.clone()),
9313 ],
9314 );
9315 let right = write_tree(
9316 &mut db,
9317 &[(b"app", 0o040000, app_right), (b"shared", 0o040000, shared)],
9318 );
9319
9320 let (pruned_left, pruned_right) =
9322 changed_tree_entries(&db, ObjectFormat::Sha1, &left, &right)
9323 .expect("test operation should succeed");
9324 assert_eq!(
9325 pruned_left.keys().collect::<Vec<_>>(),
9326 vec![&b"app/main.rs".to_vec()],
9327 "pruning should leave only the changed path on the left"
9328 );
9329 assert_eq!(
9330 pruned_right.keys().collect::<Vec<_>>(),
9331 vec![&b"app/main.rs".to_vec()],
9332 "pruning should leave only the changed path on the right"
9333 );
9334 assert!(
9335 !pruned_left.contains_key(b"shared/a.txt".as_slice()),
9336 "identical shared subtree must not appear in pruned maps"
9337 );
9338
9339 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9340 fs::remove_dir_all(root).expect("test operation should succeed");
9341 }
9342
9343 #[test]
9344 fn pruned_walk_matches_full_for_add_delete_modify_nested() {
9345 let (root, mut db) = structural_db();
9348
9349 let keep = write_blob(&mut db, b"unchanged\n");
9350 let untouched_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, keep.clone())]);
9351
9352 let nested_old = write_blob(&mut db, b"nested old\n");
9353 let nested_new = write_blob(&mut db, b"nested new\n");
9354 let dir_left = write_tree(
9355 &mut db,
9356 &[
9357 (b"changed.txt", 0o100644, nested_old),
9358 (b"stable.txt", 0o100644, keep.clone()),
9359 ],
9360 );
9361 let dir_right = write_tree(
9362 &mut db,
9363 &[
9364 (b"changed.txt", 0o100644, nested_new),
9365 (b"stable.txt", 0o100644, keep.clone()),
9366 ],
9367 );
9368
9369 let only_left = write_blob(&mut db, b"will be deleted\n");
9370 let only_right = write_blob(&mut db, b"freshly added\n");
9371
9372 let left = write_tree(
9373 &mut db,
9374 &[
9375 (b"dir", 0o040000, dir_left),
9376 (b"gone.txt", 0o100644, only_left),
9377 (b"untouched", 0o040000, untouched_dir.clone()),
9378 ],
9379 );
9380 let right = write_tree(
9381 &mut db,
9382 &[
9383 (b"dir", 0o040000, dir_right),
9384 (b"new.txt", 0o100644, only_right),
9385 (b"untouched", 0o040000, untouched_dir),
9386 ],
9387 );
9388
9389 let entries = diff_name_status_trees_with_options(
9390 &db,
9391 ObjectFormat::Sha1,
9392 &left,
9393 &right,
9394 DiffNameStatusOptions {
9395 detect_renames: false,
9396 detect_copies: false,
9397 find_copies_harder: false,
9398 rename_empty: true,
9399 },
9400 )
9401 .expect("test operation should succeed");
9402 assert_eq!(
9403 status_lines(&entries),
9404 vec![
9405 "M\tdir/changed.txt".to_string(),
9406 "D\tgone.txt".to_string(),
9407 "A\tnew.txt".to_string(),
9408 ],
9409 "unexpected raw status for mixed nested diff"
9410 );
9411
9412 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9413 fs::remove_dir_all(root).expect("test operation should succeed");
9414 }
9415
9416 #[test]
9417 fn pruned_walk_matches_full_for_rename_across_dirs() {
9418 let (root, mut db) = structural_db();
9422
9423 let moved = write_blob(&mut db, b"i get moved across directories\n");
9424 let companion = write_blob(&mut db, b"i stay put\n");
9425 let stable_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, companion.clone())]);
9426
9427 let src_dir = write_tree(&mut db, &[(b"file.txt", 0o100644, moved.clone())]);
9428 let dst_dir = write_tree(&mut db, &[(b"renamed.txt", 0o100644, moved.clone())]);
9429
9430 let left = write_tree(
9431 &mut db,
9432 &[
9433 (b"src", 0o040000, src_dir),
9434 (b"stable", 0o040000, stable_dir.clone()),
9435 ],
9436 );
9437 let right = write_tree(
9438 &mut db,
9439 &[
9440 (b"dst", 0o040000, dst_dir),
9441 (b"stable", 0o040000, stable_dir),
9442 ],
9443 );
9444
9445 let entries = diff_name_status_trees_with_options(
9446 &db,
9447 ObjectFormat::Sha1,
9448 &left,
9449 &right,
9450 DiffNameStatusOptions {
9451 detect_renames: true,
9452 detect_copies: false,
9453 find_copies_harder: false,
9454 rename_empty: true,
9455 },
9456 )
9457 .expect("test operation should succeed");
9458 assert_eq!(
9459 status_lines(&entries),
9460 vec!["R100\tsrc/file.txt\tdst/renamed.txt".to_string()],
9461 "rename across dirs should be detected on pruned set"
9462 );
9463
9464 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9465 fs::remove_dir_all(root).expect("test operation should succeed");
9466 }
9467
9468 #[test]
9469 fn pruned_walk_matches_full_for_binary_and_mode_change() {
9470 let (root, mut db) = structural_db();
9474
9475 let bin_old = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 255, 1, 2, 3]);
9476 let bin_new = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 254, 9, 8, 7]);
9477 let script = write_blob(&mut db, b"#!/bin/sh\necho hi\n");
9478
9479 let left = write_tree(
9480 &mut db,
9481 &[
9482 (b"image.bin", 0o100644, bin_old),
9483 (b"run.sh", 0o100644, script.clone()),
9484 ],
9485 );
9486 let right = write_tree(
9487 &mut db,
9488 &[
9489 (b"image.bin", 0o100644, bin_new),
9490 (b"run.sh", 0o100755, script),
9492 ],
9493 );
9494
9495 let entries = diff_name_status_trees_with_options(
9496 &db,
9497 ObjectFormat::Sha1,
9498 &left,
9499 &right,
9500 DiffNameStatusOptions {
9501 detect_renames: false,
9502 detect_copies: false,
9503 find_copies_harder: false,
9504 rename_empty: true,
9505 },
9506 )
9507 .expect("test operation should succeed");
9508 assert_eq!(
9509 status_lines(&entries),
9510 vec!["M\timage.bin".to_string(), "M\trun.sh".to_string()],
9511 "binary edit and mode-only change should both be Modify"
9512 );
9513
9514 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9515 fs::remove_dir_all(root).expect("test operation should succeed");
9516 }
9517
9518 #[test]
9519 fn pruned_walk_matches_full_for_dir_replaced_by_file() {
9520 let (root, mut db) = structural_db();
9525
9526 let inner_a = write_blob(&mut db, b"inner a\n");
9527 let inner_b = write_blob(&mut db, b"inner b\n");
9528 let thing_dir = write_tree(
9529 &mut db,
9530 &[(b"a.txt", 0o100644, inner_a), (b"b.txt", 0o100644, inner_b)],
9531 );
9532 let thing_file = write_blob(&mut db, b"now i am a file\n");
9533
9534 let other_file = write_blob(&mut db, b"i was a file\n");
9536 let other_inner = write_blob(&mut db, b"now nested\n");
9537 let other_dir = write_tree(&mut db, &[(b"x.txt", 0o100644, other_inner)]);
9538
9539 let left = write_tree(
9540 &mut db,
9541 &[
9542 (b"other", 0o100644, other_file),
9543 (b"thing", 0o040000, thing_dir),
9544 ],
9545 );
9546 let right = write_tree(
9547 &mut db,
9548 &[
9549 (b"other", 0o040000, other_dir),
9550 (b"thing", 0o100644, thing_file),
9551 ],
9552 );
9553
9554 let entries = diff_name_status_trees_with_options(
9555 &db,
9556 ObjectFormat::Sha1,
9557 &left,
9558 &right,
9559 DiffNameStatusOptions {
9560 detect_renames: false,
9561 detect_copies: false,
9562 find_copies_harder: false,
9563 rename_empty: true,
9564 },
9565 )
9566 .expect("test operation should succeed");
9567 assert_eq!(
9568 status_lines(&entries),
9569 vec![
9570 "D\tother".to_string(),
9571 "A\tother/x.txt".to_string(),
9572 "A\tthing".to_string(),
9573 "D\tthing/a.txt".to_string(),
9574 "D\tthing/b.txt".to_string(),
9575 ],
9576 "dir<->file swap should flatten to independent adds/deletes"
9577 );
9578
9579 assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9580 fs::remove_dir_all(root).expect("test operation should succeed");
9581 }
9582
9583 #[test]
9584 fn pruned_walk_matches_full_for_identical_trees() {
9585 let (root, mut db) = structural_db();
9588
9589 let blob = write_blob(&mut db, b"same\n");
9590 let sub = write_tree(&mut db, &[(b"f.txt", 0o100644, blob.clone())]);
9591 let tree = write_tree(
9592 &mut db,
9593 &[(b"sub", 0o040000, sub), (b"top.txt", 0o100644, blob)],
9594 );
9595
9596 let (pruned_left, pruned_right) =
9597 changed_tree_entries(&db, ObjectFormat::Sha1, &tree, &tree)
9598 .expect("test operation should succeed");
9599 assert!(
9600 pruned_left.is_empty() && pruned_right.is_empty(),
9601 "identical trees must produce no changed entries"
9602 );
9603
9604 let entries = diff_name_status_trees_with_options(
9605 &db,
9606 ObjectFormat::Sha1,
9607 &tree,
9608 &tree,
9609 DiffNameStatusOptions::default(),
9610 )
9611 .expect("test operation should succeed");
9612 assert!(entries.is_empty(), "identical trees must produce no diff");
9613
9614 assert_tree_diff_matches_full_all_modes(&db, &tree, &tree);
9615 fs::remove_dir_all(root).expect("test operation should succeed");
9616 }
9617
9618 #[test]
9619 fn find_copies_harder_uses_full_left_map_and_finds_unchanged_source() {
9620 let (root, mut db) = structural_db();
9625
9626 let template = write_blob(&mut db, b"reusable boilerplate content\n");
9629 let lib_dir = write_tree(&mut db, &[(b"template.txt", 0o100644, template.clone())]);
9630
9631 let trigger_old = write_blob(&mut db, b"trigger old\n");
9632 let trigger_new = write_blob(&mut db, b"trigger new\n");
9633
9634 let left = write_tree(
9635 &mut db,
9636 &[
9637 (b"lib", 0o040000, lib_dir.clone()),
9638 (b"trigger.txt", 0o100644, trigger_old),
9639 ],
9640 );
9641 let right = write_tree(
9642 &mut db,
9643 &[
9644 (b"copy.txt", 0o100644, template.clone()),
9645 (b"lib", 0o040000, lib_dir),
9646 (b"trigger.txt", 0o100644, trigger_new),
9647 ],
9648 );
9649
9650 let options = DiffNameStatusOptions {
9651 detect_renames: true,
9652 detect_copies: true,
9653 find_copies_harder: true,
9654 rename_empty: true,
9655 };
9656
9657 let (full_left, full_right) =
9659 collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
9660 .expect("test operation should succeed");
9661 let reference = diff_name_status_maps(
9662 &full_left,
9663 &full_right,
9664 full_left.keys().chain(full_right.keys()),
9665 options,
9666 )
9667 .expect("test operation should succeed");
9668
9669 let public =
9670 diff_name_status_trees_with_options(&db, ObjectFormat::Sha1, &left, &right, options)
9671 .expect("test operation should succeed");
9672 assert_eq!(
9673 status_lines(&reference),
9674 status_lines(&public),
9675 "find-copies-harder public diff must match full-map reference"
9676 );
9677 assert!(
9679 public
9680 .iter()
9681 .any(|entry| matches!(entry.status, NameStatus::Copied(_))
9682 && entry.old_path.as_ref().map(|p| p.as_bytes())
9683 == Some(b"lib/template.txt".as_slice())
9684 && entry.path == b"copy.txt"),
9685 "copy from unchanged source must be found with find_copies_harder: {public:?}"
9686 );
9687 fs::remove_dir_all(root).expect("test operation should succeed");
9688 }
9689
9690 #[test]
9691 fn pruned_walk_matches_full_with_inexact_rename_options() {
9692 let (root, mut db) = structural_db();
9695
9696 let untouched = write_blob(&mut db, b"untouched file\n");
9697 let untouched_dir = write_tree(&mut db, &[(b"u.txt", 0o100644, untouched.clone())]);
9698
9699 let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
9701 let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
9702
9703 let left = write_tree(
9704 &mut db,
9705 &[
9706 (b"a.txt", 0o100644, old),
9707 (b"keep", 0o040000, untouched_dir.clone()),
9708 ],
9709 );
9710 let right = write_tree(
9711 &mut db,
9712 &[
9713 (b"b.txt", 0o100644, new),
9714 (b"keep", 0o040000, untouched_dir),
9715 ],
9716 );
9717
9718 let options = RenameDetectionOptions {
9719 base: DiffNameStatusOptions {
9720 detect_renames: true,
9721 detect_copies: false,
9722 find_copies_harder: false,
9723 rename_empty: true,
9724 },
9725 detect_inexact: true,
9726 rename_threshold: DEFAULT_RENAME_THRESHOLD,
9727 copy_threshold: DEFAULT_RENAME_THRESHOLD,
9728 };
9729
9730 let (full_left, full_right) =
9732 collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
9733 .expect("test operation should succeed");
9734 let reference = diff_name_status_maps_with_renames(
9735 &full_left,
9736 &full_right,
9737 full_left.keys().chain(full_right.keys()),
9738 options,
9739 |oid| read_blob_bytes(&db, oid),
9740 )
9741 .expect("test operation should succeed");
9742
9743 let public = diff_name_status_trees_with_rename_options(
9744 &db,
9745 ObjectFormat::Sha1,
9746 &left,
9747 &right,
9748 options,
9749 )
9750 .expect("test operation should succeed");
9751
9752 assert_eq!(
9753 status_lines(&reference),
9754 status_lines(&public),
9755 "inexact rename via pruned walk must match full-map reference"
9756 );
9757 assert_eq!(
9758 status_lines(&public),
9759 vec!["R075\ta.txt\tb.txt".to_string()],
9760 "expected a 75% inexact rename"
9761 );
9762 fs::remove_dir_all(root).expect("test operation should succeed");
9763 }
9764}