1use std::fs;
24#[cfg(unix)]
25use std::os::unix::fs::MetadataExt;
26use std::path::{Path, PathBuf};
27
28use crate::config::ConfigSet;
29use crate::diff_indent_heuristic;
30use crate::error::{Error, Result};
31use crate::index::{Index, IndexEntry};
32use crate::objects::{parse_commit, parse_tree, CommitData, ObjectId, ObjectKind, TreeEntry};
33use crate::odb::Odb;
34use crate::userdiff::FuncnameMatcher;
35
36fn imara_unified_hunk_slices(body: &str) -> Vec<&str> {
38 let mut starts: Vec<usize> = Vec::new();
39 if body.starts_with("@@") {
40 starts.push(0);
41 }
42 for (idx, _) in body.match_indices("\n@@ ") {
43 starts.push(idx + 1);
44 }
45 starts.push(body.len());
46 starts.windows(2).map(|w| &body[w[0]..w[1]]).collect()
47}
48
49fn histogram_unified_body_raw(
50 old_content: &str,
51 new_content: &str,
52 context_lines: usize,
53 inter_hunk_context: usize,
54) -> String {
55 use imara_diff::{Algorithm, Diff, Hunk, InternedInput};
56 use std::fmt::Write as _;
57
58 let input = InternedInput::new(old_content, new_content);
59 let mut diff = Diff::compute(Algorithm::Histogram, &input);
60 diff.postprocess_lines(&input);
61
62 let hunks: Vec<Hunk> = diff.hunks().collect();
68 if hunks.is_empty() {
69 return String::new();
70 }
71
72 let ctx = context_lines.min(u32::MAX as usize) as u32;
73 let max_gap = (2usize.saturating_mul(context_lines))
74 .saturating_add(inter_hunk_context)
75 .min(u32::MAX as usize) as u32;
76 let before_len = input.before.len() as u32;
77 let after_len = input.after.len() as u32;
78
79 let mut groups: Vec<&[Hunk]> = Vec::new();
81 let mut group_start = 0usize;
82 for i in 1..hunks.len() {
83 if hunks[i].before.start - hunks[i - 1].before.end > max_gap {
84 groups.push(&hunks[group_start..i]);
85 group_start = i;
86 }
87 }
88 groups.push(&hunks[group_start..]);
89
90 fn push_line(out: &mut String, prefix: char, text: &str) {
91 out.push(prefix);
92 out.push_str(text);
93 if !text.ends_with('\n') {
94 out.push('\n');
95 }
96 }
97
98 fn fmt_side(start: u32, count: u32) -> String {
101 let shown_start = if count == 0 { start } else { start + 1 };
102 if count == 1 {
103 format!("{shown_start}")
104 } else {
105 format!("{shown_start},{count}")
106 }
107 }
108
109 let mut out = String::new();
110 for group in groups {
111 let first = &group[0];
112 let last = &group[group.len() - 1];
113 let b_start = first.before.start.saturating_sub(ctx);
114 let a_start = first.after.start.saturating_sub(ctx);
115 let b_end = (last.before.end.saturating_add(ctx)).min(before_len);
116 let a_end = (last.after.end.saturating_add(ctx)).min(after_len);
117
118 let _ = writeln!(
119 out,
120 "@@ -{} +{} @@",
121 fmt_side(b_start, b_end - b_start),
122 fmt_side(a_start, a_end - a_start)
123 );
124
125 let mut pos = b_start;
126 for hunk in group {
127 for &token in &input.before[pos as usize..hunk.before.start as usize] {
128 push_line(&mut out, ' ', input.interner[token]);
129 }
130 for &token in &input.before[hunk.before.start as usize..hunk.before.end as usize] {
131 push_line(&mut out, '-', input.interner[token]);
132 }
133 for &token in &input.after[hunk.after.start as usize..hunk.after.end as usize] {
134 push_line(&mut out, '+', input.interner[token]);
135 }
136 pos = hunk.before.end;
137 }
138 for &token in &input.before[pos as usize..b_end as usize] {
139 push_line(&mut out, ' ', input.interner[token]);
140 }
141 }
142
143 out
144}
145
146#[allow(clippy::too_many_arguments)]
162pub(crate) fn histogram_unified_body_ignore_fc<F>(
163 old_content: &str,
164 new_content: &str,
165 context_lines: usize,
166 inter_hunk_context: usize,
167 function_context: bool,
168 funcname_matcher: Option<&FuncnameMatcher>,
169 is_ignorable_change: F,
170) -> String
171where
172 F: Fn(&[&str], &[&str]) -> bool,
173{
174 use imara_diff::{Algorithm, Diff, Hunk, InternedInput};
175 use std::fmt::Write as _;
176
177 let input = InternedInput::new(old_content, new_content);
178 let mut diff = Diff::compute(Algorithm::Histogram, &input);
179 diff.postprocess_lines(&input);
180
181 let raw_hunks: Vec<Hunk> = diff.hunks().collect();
182 if raw_hunks.is_empty() {
183 return String::new();
184 }
185
186 let before_len = input.before.len() as i64;
187 let after_len = input.after.len() as i64;
188 let ctxlen = context_lines as i64;
189 let interhunk = inter_hunk_context as i64;
190 let max_common = ctxlen.saturating_add(ctxlen).saturating_add(interhunk);
192 let max_ignorable = ctxlen;
193
194 struct Change {
196 i1: i64,
197 chg1: i64,
198 i2: i64,
199 chg2: i64,
200 ignore: bool,
201 }
202
203 let mut changes: Vec<Change> = Vec::with_capacity(raw_hunks.len());
204 for h in &raw_hunks {
205 let i1 = h.before.start as i64;
206 let chg1 = (h.before.end - h.before.start) as i64;
207 let i2 = h.after.start as i64;
208 let chg2 = (h.after.end - h.after.start) as i64;
209 let removed: Vec<&str> = input.before[h.before.start as usize..h.before.end as usize]
210 .iter()
211 .map(|&t| input.interner[t])
212 .collect();
213 let added: Vec<&str> = input.after[h.after.start as usize..h.after.end as usize]
214 .iter()
215 .map(|&t| input.interner[t])
216 .collect();
217 let ignore = is_ignorable_change(&removed, &added);
218 changes.push(Change {
219 i1,
220 chg1,
221 i2,
222 chg2,
223 ignore,
224 });
225 }
226
227 let before_line = |idx: i64| -> &str { input.interner[input.before[idx as usize]] };
229 let after_line = |idx: i64| -> &str { input.interner[input.after[idx as usize]] };
230
231 let get_hunk = |start: usize| -> Option<(usize, usize)> {
237 let mut scr = start;
243 let mut xchp = start;
244 while xchp < changes.len() && changes[xchp].ignore {
245 let next = xchp + 1;
246 if next >= changes.len()
247 || changes[next].i1 - (changes[xchp].i1 + changes[xchp].chg1) >= max_ignorable
248 {
249 scr = next;
250 }
251 xchp = next;
252 }
253 if scr >= changes.len() {
254 return None;
255 }
256
257 let mut lxch = scr;
258 let mut ignored: i64 = 0;
259 let mut xchp = scr;
260 let mut idx = scr + 1;
261 while idx < changes.len() {
262 let xch = &changes[idx];
263 let prev = &changes[xchp];
264 let distance = xch.i1 - (prev.i1 + prev.chg1);
265 if distance > max_common {
266 break;
267 }
268 if distance < max_ignorable && (!xch.ignore || lxch == xchp) {
269 lxch = idx;
270 ignored = 0;
271 } else if distance < max_ignorable && xch.ignore {
272 ignored += xch.chg2;
273 } else if lxch != xchp
274 && xch.i1 + ignored - (changes[lxch].i1 + changes[lxch].chg1) > max_common
275 {
276 break;
277 } else if !xch.ignore {
278 lxch = idx;
279 ignored = 0;
280 } else {
281 ignored += xch.chg2;
282 }
283 xchp = idx;
284 idx += 1;
285 }
286 Some((scr, lxch))
287 };
288
289 fn push_line(out: &mut String, prefix: char, text: &str) {
290 out.push(prefix);
291 out.push_str(text);
292 if !text.ends_with('\n') {
293 out.push('\n');
294 }
295 }
296
297 fn fmt_side(start: i64, count: i64) -> String {
299 if count == 1 {
300 format!("{start}")
301 } else {
302 format!("{start},{count}")
303 }
304 }
305
306 let mut out = String::new();
307 let mut cursor = 0usize;
308 while cursor < changes.len() {
309 let Some((xch_idx, xche_idx)) = get_hunk(cursor) else {
310 break;
311 };
312 let xch = &changes[xch_idx];
313 let xche = &changes[xche_idx];
314
315 let mut s1 = (xch.i1 - ctxlen).max(0);
317 let mut s2 = (xch.i2 - ctxlen).max(0);
318 let mut lctx = ctxlen;
320 lctx = lctx.min(before_len - (xche.i1 + xche.chg1));
321 lctx = lctx.min(after_len - (xche.i2 + xche.chg2));
322 let mut e1 = xche.i1 + xche.chg1 + lctx;
323 let mut e2 = xche.i2 + xche.chg2 + lctx;
324
325 if function_context {
330 let mut fs1 = xch.i1;
333 while fs1 >= 0 && !is_func_line(before_line(fs1), funcname_matcher) {
334 fs1 -= 1;
335 }
336 while fs1 > 0
337 && before_line(fs1 - 1).trim().is_empty() == false
338 && !is_func_line(before_line(fs1 - 1), funcname_matcher)
339 {
340 fs1 -= 1;
341 }
342 if fs1 < 0 {
343 fs1 = 0;
344 }
345 if fs1 < s1 {
346 s2 = (s2 - (s1 - fs1)).max(0);
347 s1 = fs1;
348 }
349 let end1 = xche.i1 + xche.chg1;
352 let mut fe1 = end1;
353 while fe1 < before_len && !is_func_line(before_line(fe1), funcname_matcher) {
354 fe1 += 1;
355 }
356 while fe1 > 0 && before_line(fe1 - 1).trim().is_empty() {
357 fe1 -= 1;
358 }
359 if fe1 > before_len {
360 fe1 = before_len;
361 }
362 if fe1 > e1 {
363 e2 = (e2 + (fe1 - e1)).min(after_len);
364 e1 = fe1;
365 }
366 }
367
368 let _ = writeln!(
369 out,
370 "@@ -{} +{} @@",
371 fmt_side(s1 + 1, e1 - s1),
372 fmt_side(s2 + 1, e2 - s2)
373 );
374
375 let mut s2c = s2;
377 while s2c < xch.i2 {
378 push_line(&mut out, ' ', after_line(s2c));
379 s2c += 1;
380 }
381
382 let mut s1c = xch.i1;
384 let mut s2c = xch.i2;
385 let mut k = xch_idx;
386 loop {
387 let c = &changes[k];
388 while s1c < c.i1 && s2c < c.i2 {
390 push_line(&mut out, ' ', after_line(s2c));
391 s1c += 1;
392 s2c += 1;
393 }
394 let mut r = c.i1;
396 while r < c.i1 + c.chg1 {
397 push_line(&mut out, '-', before_line(r));
398 r += 1;
399 }
400 let mut a = c.i2;
402 while a < c.i2 + c.chg2 {
403 push_line(&mut out, '+', after_line(a));
404 a += 1;
405 }
406 if k == xche_idx {
407 break;
408 }
409 s1c = c.i1 + c.chg1;
410 s2c = c.i2 + c.chg2;
411 k += 1;
412 }
413
414 let mut s2p = xche.i2 + xche.chg2;
416 while s2p < e2 {
417 push_line(&mut out, ' ', after_line(s2p));
418 s2p += 1;
419 }
420
421 cursor = xche_idx + 1;
422 }
423
424 out
425}
426
427#[must_use]
431pub fn unified_diff_histogram_hunks_only(
432 old_content: &str,
433 new_content: &str,
434 context_lines: usize,
435 inter_hunk_context: usize,
436) -> String {
437 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context)
438}
439
440#[must_use]
442pub fn unified_diff_histogram_with_prefix_and_funcname(
443 old_content: &str,
444 new_content: &str,
445 old_path: &str,
446 new_path: &str,
447 context_lines: usize,
448 inter_hunk_context: usize,
449 src_prefix: &str,
450 dst_prefix: &str,
451 funcname_matcher: Option<&FuncnameMatcher>,
452 quote_path_fully: bool,
453) -> String {
454 use crate::quote_path::format_diff_path_with_prefix;
455
456 let body =
457 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context);
458
459 let mut output = String::new();
460 if old_path == "/dev/null" {
461 output.push_str("--- /dev/null\n");
462 } else if src_prefix.is_empty() {
463 output.push_str(&format!("--- {old_path}\n"));
464 } else {
465 output.push_str("--- ");
466 output.push_str(&format_diff_path_with_prefix(
467 src_prefix,
468 old_path,
469 quote_path_fully,
470 ));
471 output.push('\n');
472 }
473 if new_path == "/dev/null" {
474 output.push_str("+++ /dev/null\n");
475 } else if dst_prefix.is_empty() {
476 output.push_str(&format!("+++ {new_path}\n"));
477 } else {
478 output.push_str("+++ ");
479 output.push_str(&format_diff_path_with_prefix(
480 dst_prefix,
481 new_path,
482 quote_path_fully,
483 ));
484 output.push('\n');
485 }
486
487 let old_lines: Vec<&str> = old_content.lines().collect();
488 for hunk_str in imara_unified_hunk_slices(&body) {
489 if hunk_str.is_empty() {
490 continue;
491 }
492 if let Some(first_newline) = hunk_str.find('\n') {
493 let header_line = &hunk_str[..first_newline];
494 let rest = &hunk_str[first_newline..];
495 if let Some(func_ctx) =
496 extract_function_context(header_line, &old_lines, funcname_matcher)
497 {
498 output.push_str(header_line);
499 output.push(' ');
500 output.push_str(&func_ctx);
501 output.push_str(rest);
502 } else {
503 output.push_str(hunk_str);
504 }
505 } else {
506 output.push_str(hunk_str);
507 }
508 }
509
510 output
511}
512
513#[allow(clippy::too_many_arguments)]
523pub fn unified_diff_histogram_ignore_with_prefix_and_funcname<F>(
524 old_content: &str,
525 new_content: &str,
526 old_path: &str,
527 new_path: &str,
528 context_lines: usize,
529 inter_hunk_context: usize,
530 src_prefix: &str,
531 dst_prefix: &str,
532 funcname_matcher: Option<&FuncnameMatcher>,
533 quote_path_fully: bool,
534 function_context: bool,
535 is_ignorable_change: F,
536) -> String
537where
538 F: Fn(&[&str], &[&str]) -> bool,
539{
540 use crate::quote_path::format_diff_path_with_prefix;
541
542 let body = histogram_unified_body_ignore_fc(
543 old_content,
544 new_content,
545 context_lines,
546 inter_hunk_context,
547 function_context,
548 funcname_matcher,
549 is_ignorable_change,
550 );
551 if body.is_empty() {
552 return String::new();
553 }
554
555 let mut output = String::new();
556 if old_path == "/dev/null" {
557 output.push_str("--- /dev/null\n");
558 } else if src_prefix.is_empty() {
559 output.push_str(&format!("--- {old_path}\n"));
560 } else {
561 output.push_str("--- ");
562 output.push_str(&format_diff_path_with_prefix(
563 src_prefix,
564 old_path,
565 quote_path_fully,
566 ));
567 output.push('\n');
568 }
569 if new_path == "/dev/null" {
570 output.push_str("+++ /dev/null\n");
571 } else if dst_prefix.is_empty() {
572 output.push_str(&format!("+++ {new_path}\n"));
573 } else {
574 output.push_str("+++ ");
575 output.push_str(&format_diff_path_with_prefix(
576 dst_prefix,
577 new_path,
578 quote_path_fully,
579 ));
580 output.push('\n');
581 }
582
583 let old_lines: Vec<&str> = old_content.lines().collect();
584 for hunk_str in imara_unified_hunk_slices(&body) {
585 if hunk_str.is_empty() {
586 continue;
587 }
588 if let Some(first_newline) = hunk_str.find('\n') {
589 let header_line = &hunk_str[..first_newline];
590 let rest = &hunk_str[first_newline..];
591 if let Some(func_ctx) =
592 extract_function_context(header_line, &old_lines, funcname_matcher)
593 {
594 output.push_str(header_line);
595 output.push(' ');
596 output.push_str(&func_ctx);
597 output.push_str(rest);
598 } else {
599 output.push_str(hunk_str);
600 }
601 } else {
602 output.push_str(hunk_str);
603 }
604 }
605
606 output
607}
608
609#[must_use]
611pub fn indent_heuristic_from_config(config: &ConfigSet) -> bool {
612 match config.get_bool("diff.indentHeuristic") {
613 Some(Ok(b)) => b,
614 Some(Err(_)) | None => true,
615 }
616}
617
618#[must_use]
620pub fn resolve_indent_heuristic(
621 config: &ConfigSet,
622 cli_indent_heuristic: bool,
623 cli_no_indent_heuristic: bool,
624) -> bool {
625 if cli_no_indent_heuristic {
626 false
627 } else if cli_indent_heuristic {
628 true
629 } else {
630 indent_heuristic_from_config(config)
631 }
632}
633
634#[must_use]
636pub fn parse_indent_heuristic_cli_flags(argv: &[String]) -> (bool, bool) {
637 let mut indent_heuristic = false;
638 let mut no_indent_heuristic = false;
639 for a in argv {
640 match a.as_str() {
641 "--indent-heuristic" => {
642 indent_heuristic = true;
643 no_indent_heuristic = false;
644 }
645 "--no-indent-heuristic" => {
646 no_indent_heuristic = true;
647 indent_heuristic = false;
648 }
649 _ => {}
650 }
651 }
652 (indent_heuristic, no_indent_heuristic)
653}
654
655mod git_xdiff {
665 const XDL_MAX_COST_MIN: i64 = 256;
666 const XDL_HEUR_MIN_COST: i64 = 256;
667 const XDL_SNAKE_CNT: i64 = 20;
668 const XDL_K_HEUR: i64 = 4;
669 const XDL_LINE_MAX: i64 = i64::MAX;
670 const XDL_KPDIS_RUN: i64 = 4;
671 const XDL_MAX_EQLIMIT: i64 = 1024;
672 const XDL_SIMSCAN_WINDOW: i64 = 100;
673
674 const DISCARD: u8 = 0;
676 const KEEP: u8 = 1;
677 const INVESTIGATE: u8 = 2;
678
679 fn bogosqrt(mut n: i64) -> i64 {
681 let mut i: i64 = 1;
682 while n > 0 {
683 i <<= 1;
684 n >>= 2;
685 }
686 i
687 }
688
689 struct XdAlgoEnv {
690 mxcost: i64,
691 snake_cnt: i64,
692 heur_min: i64,
693 }
694
695 struct XdpSplit {
696 i1: i64,
697 i2: i64,
698 min_lo: bool,
699 min_hi: bool,
700 }
701
702 struct XdFile<'a> {
705 ids: &'a [u32],
706 changed: Vec<bool>,
707 reference_index: Vec<usize>,
708 dstart: i64,
709 dend: i64,
710 nreff: i64,
711 }
712
713 #[inline]
715 fn get_id(xdf: &XdFile<'_>, idx: i64) -> u32 {
716 xdf.ids[xdf.reference_index[idx as usize]]
717 }
718
719 #[allow(clippy::too_many_arguments)]
722 fn xdl_split(
723 xdf1: &XdFile<'_>,
724 off1: i64,
725 lim1: i64,
726 xdf2: &XdFile<'_>,
727 off2: i64,
728 lim2: i64,
729 kvdf: &mut [i64],
730 kvdb: &mut [i64],
731 koff: i64,
732 need_min: bool,
733 spl: &mut XdpSplit,
734 xenv: &XdAlgoEnv,
735 ) -> i64 {
736 let dmin = off1 - lim2;
737 let dmax = lim1 - off2;
738 let fmid = off1 - off2;
739 let bmid = lim1 - lim2;
740 let odd = ((fmid - bmid) & 1) != 0;
741 let mut fmin = fmid;
742 let mut fmax = fmid;
743 let mut bmin = bmid;
744 let mut bmax = bmid;
745
746 let kf = |k: i64| -> usize { (k + koff) as usize };
747
748 kvdf[kf(fmid)] = off1;
749 kvdb[kf(bmid)] = lim1;
750
751 let mut ec: i64 = 1;
752 loop {
753 let mut got_snake = false;
754
755 if fmin > dmin {
756 fmin -= 1;
757 kvdf[kf(fmin - 1)] = -1;
758 } else {
759 fmin += 1;
760 }
761 if fmax < dmax {
762 fmax += 1;
763 kvdf[kf(fmax + 1)] = -1;
764 } else {
765 fmax -= 1;
766 }
767
768 let mut d = fmax;
769 while d >= fmin {
770 let mut i1 = if kvdf[kf(d - 1)] >= kvdf[kf(d + 1)] {
771 kvdf[kf(d - 1)] + 1
772 } else {
773 kvdf[kf(d + 1)]
774 };
775 let prev1 = i1;
776 let mut i2 = i1 - d;
777 while i1 < lim1 && i2 < lim2 && get_id(xdf1, i1) == get_id(xdf2, i2) {
778 i1 += 1;
779 i2 += 1;
780 }
781 if i1 - prev1 > xenv.snake_cnt {
782 got_snake = true;
783 }
784 kvdf[kf(d)] = i1;
785 if odd && bmin <= d && d <= bmax && kvdb[kf(d)] <= i1 {
786 spl.i1 = i1;
787 spl.i2 = i2;
788 spl.min_lo = true;
789 spl.min_hi = true;
790 return ec;
791 }
792 d -= 2;
793 }
794
795 if bmin > dmin {
796 bmin -= 1;
797 kvdb[kf(bmin - 1)] = XDL_LINE_MAX;
798 } else {
799 bmin += 1;
800 }
801 if bmax < dmax {
802 bmax += 1;
803 kvdb[kf(bmax + 1)] = XDL_LINE_MAX;
804 } else {
805 bmax -= 1;
806 }
807
808 let mut d = bmax;
809 while d >= bmin {
810 let mut i1 = if kvdb[kf(d - 1)] < kvdb[kf(d + 1)] {
811 kvdb[kf(d - 1)]
812 } else {
813 kvdb[kf(d + 1)] - 1
814 };
815 let prev1 = i1;
816 let mut i2 = i1 - d;
817 while i1 > off1 && i2 > off2 && get_id(xdf1, i1 - 1) == get_id(xdf2, i2 - 1) {
818 i1 -= 1;
819 i2 -= 1;
820 }
821 if prev1 - i1 > xenv.snake_cnt {
822 got_snake = true;
823 }
824 kvdb[kf(d)] = i1;
825 if !odd && fmin <= d && d <= fmax && i1 <= kvdf[kf(d)] {
826 spl.i1 = i1;
827 spl.i2 = i2;
828 spl.min_lo = true;
829 spl.min_hi = true;
830 return ec;
831 }
832 d -= 2;
833 }
834
835 if need_min {
836 ec += 1;
837 continue;
838 }
839
840 if got_snake && ec > xenv.heur_min {
841 let mut best = 0i64;
842 let mut d = fmax;
843 while d >= fmin {
844 let dd = if d > fmid { d - fmid } else { fmid - d };
845 let i1 = kvdf[kf(d)];
846 let i2 = i1 - d;
847 let v = (i1 - off1) + (i2 - off2) - dd;
848 if v > XDL_K_HEUR * ec
849 && v > best
850 && off1 + xenv.snake_cnt <= i1
851 && i1 < lim1
852 && off2 + xenv.snake_cnt <= i2
853 && i2 < lim2
854 {
855 let mut k = 1i64;
856 while get_id(xdf1, i1 - k) == get_id(xdf2, i2 - k) {
857 if k == xenv.snake_cnt {
858 best = v;
859 spl.i1 = i1;
860 spl.i2 = i2;
861 break;
862 }
863 k += 1;
864 }
865 }
866 d -= 2;
867 }
868 if best > 0 {
869 spl.min_lo = true;
870 spl.min_hi = false;
871 return ec;
872 }
873
874 let mut best = 0i64;
875 let mut d = bmax;
876 while d >= bmin {
877 let dd = if d > bmid { d - bmid } else { bmid - d };
878 let i1 = kvdb[kf(d)];
879 let i2 = i1 - d;
880 let v = (lim1 - i1) + (lim2 - i2) - dd;
881 if v > XDL_K_HEUR * ec
882 && v > best
883 && off1 < i1
884 && i1 <= lim1 - xenv.snake_cnt
885 && off2 < i2
886 && i2 <= lim2 - xenv.snake_cnt
887 {
888 let mut k = 0i64;
889 while get_id(xdf1, i1 + k) == get_id(xdf2, i2 + k) {
890 if k == xenv.snake_cnt - 1 {
891 best = v;
892 spl.i1 = i1;
893 spl.i2 = i2;
894 break;
895 }
896 k += 1;
897 }
898 }
899 d -= 2;
900 }
901 if best > 0 {
902 spl.min_lo = false;
903 spl.min_hi = true;
904 return ec;
905 }
906 }
907
908 if ec >= xenv.mxcost {
909 let mut fbest = -1i64;
910 let mut fbest1 = -1i64;
911 let mut d = fmax;
912 while d >= fmin {
913 let mut i1 = kvdf[kf(d)].min(lim1);
914 let mut i2 = i1 - d;
915 if lim2 < i2 {
916 i1 = lim2 + d;
917 i2 = lim2;
918 }
919 if fbest < i1 + i2 {
920 fbest = i1 + i2;
921 fbest1 = i1;
922 }
923 d -= 2;
924 }
925
926 let mut bbest = XDL_LINE_MAX;
927 let mut bbest1 = XDL_LINE_MAX;
928 let mut d = bmax;
929 while d >= bmin {
930 let mut i1 = off1.max(kvdb[kf(d)]);
931 let mut i2 = i1 - d;
932 if i2 < off2 {
933 i1 = off2 + d;
934 i2 = off2;
935 }
936 if i1 + i2 < bbest {
937 bbest = i1 + i2;
938 bbest1 = i1;
939 }
940 d -= 2;
941 }
942
943 if (lim1 + lim2) - bbest < fbest - (off1 + off2) {
944 spl.i1 = fbest1;
945 spl.i2 = fbest - fbest1;
946 spl.min_lo = true;
947 spl.min_hi = false;
948 } else {
949 spl.i1 = bbest1;
950 spl.i2 = bbest - bbest1;
951 spl.min_lo = false;
952 spl.min_hi = true;
953 }
954 return ec;
955 }
956
957 ec += 1;
958 }
959 }
960
961 #[allow(clippy::too_many_arguments)]
963 fn xdl_recs_cmp(
964 xdf1: &mut XdFile<'_>,
965 mut off1: i64,
966 mut lim1: i64,
967 xdf2: &mut XdFile<'_>,
968 mut off2: i64,
969 mut lim2: i64,
970 kvdf: &mut [i64],
971 kvdb: &mut [i64],
972 koff: i64,
973 need_min: bool,
974 xenv: &XdAlgoEnv,
975 ) {
976 while off1 < lim1 && off2 < lim2 && get_id(xdf1, off1) == get_id(xdf2, off2) {
977 off1 += 1;
978 off2 += 1;
979 }
980 while off1 < lim1 && off2 < lim2 && get_id(xdf1, lim1 - 1) == get_id(xdf2, lim2 - 1) {
981 lim1 -= 1;
982 lim2 -= 1;
983 }
984
985 if off1 == lim1 {
986 while off2 < lim2 {
987 let r = xdf2.reference_index[off2 as usize];
988 xdf2.changed[r] = true;
989 off2 += 1;
990 }
991 } else if off2 == lim2 {
992 while off1 < lim1 {
993 let r = xdf1.reference_index[off1 as usize];
994 xdf1.changed[r] = true;
995 off1 += 1;
996 }
997 } else {
998 let mut spl = XdpSplit {
999 i1: 0,
1000 i2: 0,
1001 min_lo: false,
1002 min_hi: false,
1003 };
1004 xdl_split(
1005 xdf1, off1, lim1, xdf2, off2, lim2, kvdf, kvdb, koff, need_min, &mut spl, xenv,
1006 );
1007 xdl_recs_cmp(
1008 xdf1, off1, spl.i1, xdf2, off2, spl.i2, kvdf, kvdb, koff, spl.min_lo, xenv,
1009 );
1010 xdl_recs_cmp(
1011 xdf1, spl.i1, lim1, xdf2, spl.i2, lim2, kvdf, kvdb, koff, spl.min_hi, xenv,
1012 );
1013 }
1014 }
1015
1016 fn clean_mmatch(action: &[u8], i: i64, mut s: i64, mut e: i64) -> bool {
1018 if i - s > XDL_SIMSCAN_WINDOW {
1019 s = i - XDL_SIMSCAN_WINDOW;
1020 }
1021 if e - i > XDL_SIMSCAN_WINDOW {
1022 e = i + XDL_SIMSCAN_WINDOW;
1023 }
1024
1025 let mut rdis0 = 0i64;
1026 let mut rpdis0 = 1i64;
1027 let mut r = 1i64;
1028 while i - r >= s {
1029 match action[(i - r) as usize] {
1030 DISCARD => rdis0 += 1,
1031 INVESTIGATE => rpdis0 += 1,
1032 _ => break, }
1034 r += 1;
1035 }
1036 if rdis0 == 0 {
1037 return false;
1038 }
1039 let mut rdis1 = 0i64;
1040 let mut rpdis1 = 1i64;
1041 let mut r = 1i64;
1042 while i + r <= e {
1043 match action[(i + r) as usize] {
1044 DISCARD => rdis1 += 1,
1045 INVESTIGATE => rpdis1 += 1,
1046 _ => break, }
1048 r += 1;
1049 }
1050 if rdis1 == 0 {
1051 return false;
1052 }
1053 rdis1 += rdis0;
1054 rpdis1 += rpdis0;
1055 rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1)
1056 }
1057
1058 pub fn changed_flags(ids1: &[u32], ids2: &[u32]) -> (Vec<bool>, Vec<bool>) {
1062 let nrec1 = ids1.len();
1063 let nrec2 = ids2.len();
1064
1065 use std::collections::HashMap;
1067 let mut count1: HashMap<u32, i64> = HashMap::new();
1068 let mut count2: HashMap<u32, i64> = HashMap::new();
1069 for &id in ids1 {
1070 *count1.entry(id).or_insert(0) += 1;
1071 }
1072 for &id in ids2 {
1073 *count2.entry(id).or_insert(0) += 1;
1074 }
1075
1076 let mut xdf1 = XdFile {
1077 ids: ids1,
1078 changed: vec![false; nrec1],
1079 reference_index: Vec::new(),
1080 dstart: 0,
1081 dend: nrec1 as i64 - 1,
1082 nreff: 0,
1083 };
1084 let mut xdf2 = XdFile {
1085 ids: ids2,
1086 changed: vec![false; nrec2],
1087 reference_index: Vec::new(),
1088 dstart: 0,
1089 dend: nrec2 as i64 - 1,
1090 nreff: 0,
1091 };
1092
1093 let lim = nrec1.min(nrec2) as i64;
1095 let mut i = 0i64;
1096 while i < lim && ids1[i as usize] == ids2[i as usize] {
1097 i += 1;
1098 }
1099 xdf1.dstart = i;
1100 xdf2.dstart = i;
1101 let mut j = 0i64;
1102 let rem = lim - i;
1103 while j < rem && ids1[nrec1 - 1 - j as usize] == ids2[nrec2 - 1 - j as usize] {
1104 j += 1;
1105 }
1106 xdf1.dend = nrec1 as i64 - j - 1;
1107 xdf2.dend = nrec2 as i64 - j - 1;
1108
1109 let mut action1 = vec![0u8; nrec1 + 1];
1111 let mut action2 = vec![0u8; nrec2 + 1];
1112
1113 let mut mlim = bogosqrt(nrec1 as i64);
1114 if mlim > XDL_MAX_EQLIMIT {
1115 mlim = XDL_MAX_EQLIMIT;
1116 }
1117 let mut idx = xdf1.dstart;
1118 while idx <= xdf1.dend {
1119 let id = ids1[idx as usize];
1120 let nm = *count2.get(&id).unwrap_or(&0);
1121 action1[idx as usize] = if nm == 0 {
1122 DISCARD
1123 } else if nm >= mlim {
1124 INVESTIGATE
1125 } else {
1126 KEEP
1127 };
1128 idx += 1;
1129 }
1130
1131 let mut mlim = bogosqrt(nrec2 as i64);
1132 if mlim > XDL_MAX_EQLIMIT {
1133 mlim = XDL_MAX_EQLIMIT;
1134 }
1135 let mut idx = xdf2.dstart;
1136 while idx <= xdf2.dend {
1137 let id = ids2[idx as usize];
1138 let nm = *count1.get(&id).unwrap_or(&0);
1139 action2[idx as usize] = if nm == 0 {
1140 DISCARD
1141 } else if nm >= mlim {
1142 INVESTIGATE
1143 } else {
1144 KEEP
1145 };
1146 idx += 1;
1147 }
1148
1149 let mut idx = xdf1.dstart;
1150 while idx <= xdf1.dend {
1151 let a = action1[idx as usize];
1152 if a == KEEP
1153 || (a == INVESTIGATE && !clean_mmatch(&action1, idx, xdf1.dstart, xdf1.dend))
1154 {
1155 xdf1.reference_index.push(idx as usize);
1156 } else {
1157 xdf1.changed[idx as usize] = true;
1158 }
1159 idx += 1;
1160 }
1161 xdf1.nreff = xdf1.reference_index.len() as i64;
1162
1163 let mut idx = xdf2.dstart;
1164 while idx <= xdf2.dend {
1165 let a = action2[idx as usize];
1166 if a == KEEP
1167 || (a == INVESTIGATE && !clean_mmatch(&action2, idx, xdf2.dstart, xdf2.dend))
1168 {
1169 xdf2.reference_index.push(idx as usize);
1170 } else {
1171 xdf2.changed[idx as usize] = true;
1172 }
1173 idx += 1;
1174 }
1175 xdf2.nreff = xdf2.reference_index.len() as i64;
1176
1177 let ndiags = xdf1.nreff + xdf2.nreff + 3;
1179 let kvd_len = (2 * ndiags + 2) as usize;
1180 let mut kvd = vec![0i64; kvd_len];
1181 let koff = xdf2.nreff + 1;
1183 let (kvdf_slice, kvdb_slice) = kvd.split_at_mut(ndiags as usize);
1184 let xenv = XdAlgoEnv {
1187 mxcost: bogosqrt(ndiags).max(XDL_MAX_COST_MIN),
1188 snake_cnt: XDL_SNAKE_CNT,
1189 heur_min: XDL_HEUR_MIN_COST,
1190 };
1191
1192 let nreff1 = xdf1.nreff;
1193 let nreff2 = xdf2.nreff;
1194 xdl_recs_cmp(
1195 &mut xdf1, 0, nreff1, &mut xdf2, 0, nreff2, kvdf_slice, kvdb_slice, koff, false, &xenv,
1196 );
1197
1198 (xdf1.changed, xdf2.changed)
1199 }
1200}
1201
1202fn changed_flags_to_ops(
1204 changed1: &[bool],
1205 changed2: &[bool],
1206 old_len: usize,
1207 new_len: usize,
1208) -> Vec<similar::DiffOp> {
1209 use similar::DiffOp;
1210 let mut ops: Vec<DiffOp> = Vec::new();
1211 let mut i = 0usize;
1212 let mut j = 0usize;
1213 while i < old_len || j < new_len {
1214 let del = i < old_len && changed1[i];
1215 let ins = j < new_len && changed2[j];
1216 if !del && !ins {
1217 let start_i = i;
1219 let start_j = j;
1220 while i < old_len && j < new_len && !changed1[i] && !changed2[j] {
1221 i += 1;
1222 j += 1;
1223 }
1224 ops.push(DiffOp::Equal {
1225 old_index: start_i,
1226 new_index: start_j,
1227 len: i - start_i,
1228 });
1229 } else {
1230 let start_i = i;
1232 let start_j = j;
1233 while i < old_len && changed1[i] {
1234 i += 1;
1235 }
1236 while j < new_len && changed2[j] {
1237 j += 1;
1238 }
1239 let dlen = i - start_i;
1240 let ilen = j - start_j;
1241 if dlen > 0 && ilen > 0 {
1242 ops.push(DiffOp::Replace {
1243 old_index: start_i,
1244 old_len: dlen,
1245 new_index: start_j,
1246 new_len: ilen,
1247 });
1248 } else if dlen > 0 {
1249 ops.push(DiffOp::Delete {
1250 old_index: start_i,
1251 old_len: dlen,
1252 new_index: start_j,
1253 });
1254 } else if ilen > 0 {
1255 ops.push(DiffOp::Insert {
1256 old_index: start_i,
1257 new_index: start_j,
1258 new_len: ilen,
1259 });
1260 }
1261 }
1262 }
1263 ops
1264}
1265
1266#[must_use]
1272pub fn word_diff_ops_imara(old_words: &[&str], new_words: &[&str]) -> Vec<similar::DiffOp> {
1273 use std::collections::HashMap;
1274
1275 let mut interner: HashMap<&str, u32> = HashMap::new();
1277 let mut ids1: Vec<u32> = Vec::with_capacity(old_words.len());
1278 for &w in old_words {
1279 let next = interner.len() as u32;
1280 let id = *interner.entry(w).or_insert(next);
1281 ids1.push(id);
1282 }
1283 let mut ids2: Vec<u32> = Vec::with_capacity(new_words.len());
1284 for &w in new_words {
1285 let next = interner.len() as u32;
1286 let id = *interner.entry(w).or_insert(next);
1287 ids2.push(id);
1288 }
1289
1290 let (changed1, changed2) = git_xdiff::changed_flags(&ids1, &ids2);
1291 let ops = changed_flags_to_ops(&changed1, &changed2, old_words.len(), new_words.len());
1292
1293 diff_indent_heuristic::apply_change_compact_to_ops(&ops, old_words, new_words, false)
1296}
1297
1298#[must_use]
1300pub fn diff_slice_ops_compacted(
1301 old_lines: &[&str],
1302 new_lines: &[&str],
1303 algorithm: similar::Algorithm,
1304 indent_heuristic: bool,
1305) -> Vec<similar::DiffOp> {
1306 diff_indent_heuristic::diff_slice_ops_compacted(
1307 old_lines,
1308 new_lines,
1309 algorithm,
1310 indent_heuristic,
1311 )
1312}
1313
1314#[must_use]
1316pub fn map_new_to_old_lines_compacted(
1317 old_joined: &str,
1318 new_joined: &str,
1319 algorithm: similar::Algorithm,
1320 indent_heuristic: bool,
1321 new_line_count: usize,
1322) -> Vec<Option<usize>> {
1323 let ops = diff_indent_heuristic::diff_lines_ops_compacted(
1324 old_joined,
1325 new_joined,
1326 algorithm,
1327 indent_heuristic,
1328 );
1329 diff_indent_heuristic::map_new_to_old_from_ops(&ops, new_line_count)
1330}
1331
1332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1334pub enum DiffStatus {
1335 Added,
1337 Deleted,
1339 Modified,
1341 Renamed,
1343 Copied,
1345 TypeChanged,
1347 Unmerged,
1349}
1350
1351impl DiffStatus {
1352 #[must_use]
1354 pub fn letter(&self) -> char {
1355 match self {
1356 Self::Added => 'A',
1357 Self::Deleted => 'D',
1358 Self::Modified => 'M',
1359 Self::Renamed => 'R',
1360 Self::Copied => 'C',
1361 Self::TypeChanged => 'T',
1362 Self::Unmerged => 'U',
1363 }
1364 }
1365}
1366
1367#[derive(Debug, Clone, PartialEq, Eq)]
1369pub struct DiffEntry {
1370 pub status: DiffStatus,
1372 pub old_path: Option<String>,
1374 pub new_path: Option<String>,
1376 pub old_mode: String,
1378 pub new_mode: String,
1380 pub old_oid: ObjectId,
1382 pub new_oid: ObjectId,
1384 pub score: Option<u32>,
1386}
1387
1388impl DiffEntry {
1389 #[must_use]
1391 pub fn path(&self) -> &str {
1392 self.new_path
1393 .as_deref()
1394 .or(self.old_path.as_deref())
1395 .unwrap_or("")
1396 }
1397
1398 #[must_use]
1403 pub fn display_path(&self) -> String {
1404 match self.status {
1405 DiffStatus::Renamed | DiffStatus::Copied => {
1406 let old = self.old_path.as_deref().unwrap_or("");
1407 let new = self.new_path.as_deref().unwrap_or("");
1408 if old.is_empty() || new.is_empty() {
1409 self.path().to_owned()
1410 } else {
1411 format!("{old} -> {new}")
1412 }
1413 }
1414 _ => self.path().to_owned(),
1415 }
1416 }
1417}
1418
1419pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
1421
1422#[must_use]
1424pub fn zero_oid() -> ObjectId {
1425 ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
1426 panic!("internal error: failed to create zero OID");
1428 })
1429}
1430
1431#[must_use]
1433pub fn empty_blob_oid() -> ObjectId {
1434 ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
1435 panic!("internal error: failed to create empty blob OID");
1437 })
1438}
1439
1440pub fn diff_trees(
1455 odb: &Odb,
1456 old_tree_oid: Option<&ObjectId>,
1457 new_tree_oid: Option<&ObjectId>,
1458 prefix: &str,
1459) -> Result<Vec<DiffEntry>> {
1460 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
1461}
1462
1463pub fn diff_trees_show_tree_entries(
1467 odb: &Odb,
1468 old_tree_oid: Option<&ObjectId>,
1469 new_tree_oid: Option<&ObjectId>,
1470 prefix: &str,
1471) -> Result<Vec<DiffEntry>> {
1472 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
1473}
1474
1475fn diff_trees_opts(
1476 odb: &Odb,
1477 old_tree_oid: Option<&ObjectId>,
1478 new_tree_oid: Option<&ObjectId>,
1479 prefix: &str,
1480 show_trees: bool,
1481) -> Result<Vec<DiffEntry>> {
1482 let old_entries = match old_tree_oid {
1483 Some(oid) => read_tree(odb, oid)?,
1484 None => Vec::new(),
1485 };
1486 let new_entries = match new_tree_oid {
1487 Some(oid) => read_tree(odb, oid)?,
1488 None => Vec::new(),
1489 };
1490
1491 let mut result = Vec::new();
1492 diff_tree_entries_opts(
1493 odb,
1494 &old_entries,
1495 &new_entries,
1496 prefix,
1497 show_trees,
1498 &mut result,
1499 )?;
1500 Ok(result)
1501}
1502
1503fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
1505 let obj = odb.read(oid)?;
1506 if obj.kind != ObjectKind::Tree {
1507 return Err(Error::CorruptObject(format!(
1508 "expected tree, got {}",
1509 obj.kind.as_str()
1510 )));
1511 }
1512 parse_tree(&obj.data)
1513}
1514
1515fn diff_tree_entries_opts(
1517 odb: &Odb,
1518 old: &[TreeEntry],
1519 new: &[TreeEntry],
1520 prefix: &str,
1521 show_trees: bool,
1522 result: &mut Vec<DiffEntry>,
1523) -> Result<()> {
1524 let mut oi = 0;
1525 let mut ni = 0;
1526
1527 while oi < old.len() || ni < new.len() {
1528 match (old.get(oi), new.get(ni)) {
1529 (Some(o), Some(n)) => {
1530 let cmp = crate::objects::tree_entry_cmp(
1531 &o.name,
1532 is_tree_mode(o.mode),
1533 &n.name,
1534 is_tree_mode(n.mode),
1535 );
1536 match cmp {
1537 std::cmp::Ordering::Less => {
1538 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1540 oi += 1;
1541 }
1542 std::cmp::Ordering::Greater => {
1543 emit_added_opts(odb, n, prefix, show_trees, result)?;
1545 ni += 1;
1546 }
1547 std::cmp::Ordering::Equal => {
1548 if o.oid != n.oid || o.mode != n.mode {
1550 let name_str = String::from_utf8_lossy(&o.name);
1551 let path = format_path(prefix, &name_str);
1552 if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
1553 if show_trees {
1555 result.push(DiffEntry {
1556 status: DiffStatus::Modified,
1557 old_path: Some(path.clone()),
1558 new_path: Some(path.clone()),
1559 old_mode: format_mode(o.mode),
1560 new_mode: format_mode(n.mode),
1561 old_oid: o.oid,
1562 new_oid: n.oid,
1563 score: None,
1564 });
1565 }
1566 let nested = diff_trees_opts(
1568 odb,
1569 Some(&o.oid),
1570 Some(&n.oid),
1571 &path,
1572 show_trees,
1573 )?;
1574 result.extend(nested);
1575 } else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
1576 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1578 emit_added_opts(odb, n, prefix, show_trees, result)?;
1579 } else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
1580 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1582 emit_added_opts(odb, n, prefix, show_trees, result)?;
1583 } else {
1584 let old_type = o.mode & 0o170000;
1588 let new_type = n.mode & 0o170000;
1589 result.push(DiffEntry {
1590 status: if old_type != new_type {
1591 DiffStatus::TypeChanged
1592 } else {
1593 DiffStatus::Modified
1594 },
1595 old_path: Some(path.clone()),
1596 new_path: Some(path),
1597 old_mode: format_mode(o.mode),
1598 new_mode: format_mode(n.mode),
1599 old_oid: o.oid,
1600 new_oid: n.oid,
1601 score: None,
1602 });
1603 }
1604 }
1605 oi += 1;
1606 ni += 1;
1607 }
1608 }
1609 }
1610 (Some(o), None) => {
1611 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1612 oi += 1;
1613 }
1614 (None, Some(n)) => {
1615 emit_added_opts(odb, n, prefix, show_trees, result)?;
1616 ni += 1;
1617 }
1618 (None, None) => break,
1619 }
1620 }
1621
1622 Ok(())
1623}
1624
1625fn emit_deleted_opts(
1626 odb: &Odb,
1627 entry: &TreeEntry,
1628 prefix: &str,
1629 show_trees: bool,
1630 result: &mut Vec<DiffEntry>,
1631) -> Result<()> {
1632 let name_str = String::from_utf8_lossy(&entry.name);
1633 let path = format_path(prefix, &name_str);
1634 if is_tree_mode(entry.mode) {
1635 if show_trees {
1636 result.push(DiffEntry {
1637 status: DiffStatus::Deleted,
1638 old_path: Some(path.clone()),
1639 new_path: None,
1640 old_mode: format_mode(entry.mode),
1641 new_mode: "000000".to_owned(),
1642 old_oid: entry.oid,
1643 new_oid: zero_oid(),
1644 score: None,
1645 });
1646 }
1647 let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
1649 result.extend(nested);
1650 } else {
1651 result.push(DiffEntry {
1652 status: DiffStatus::Deleted,
1653 old_path: Some(path.clone()),
1654 new_path: None,
1655 old_mode: format_mode(entry.mode),
1656 new_mode: "000000".to_owned(),
1657 old_oid: entry.oid,
1658 new_oid: zero_oid(),
1659 score: None,
1660 });
1661 }
1662 Ok(())
1663}
1664
1665fn emit_added_opts(
1666 odb: &Odb,
1667 entry: &TreeEntry,
1668 prefix: &str,
1669 show_trees: bool,
1670 result: &mut Vec<DiffEntry>,
1671) -> Result<()> {
1672 let name_str = String::from_utf8_lossy(&entry.name);
1673 let path = format_path(prefix, &name_str);
1674 if is_tree_mode(entry.mode) {
1675 if show_trees {
1676 result.push(DiffEntry {
1677 status: DiffStatus::Added,
1678 old_path: None,
1679 new_path: Some(path.clone()),
1680 old_mode: "000000".to_owned(),
1681 new_mode: format_mode(entry.mode),
1682 old_oid: zero_oid(),
1683 new_oid: entry.oid,
1684 score: None,
1685 });
1686 }
1687 let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
1689 result.extend(nested);
1690 } else {
1691 result.push(DiffEntry {
1692 status: DiffStatus::Added,
1693 old_path: None,
1694 new_path: Some(path),
1695 old_mode: "000000".to_owned(),
1696 new_mode: format_mode(entry.mode),
1697 old_oid: zero_oid(),
1698 new_oid: entry.oid,
1699 score: None,
1700 });
1701 }
1702 Ok(())
1703}
1704
1705pub fn diff_index_to_tree(
1725 odb: &Odb,
1726 index: &Index,
1727 tree_oid: Option<&ObjectId>,
1728 ignore_submodules: bool,
1729) -> Result<Vec<DiffEntry>> {
1730 let tree_entries = match tree_oid {
1732 Some(oid) => flatten_tree(odb, oid, "")?,
1733 None => Vec::new(),
1734 };
1735
1736 let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
1738 std::collections::BTreeMap::new();
1739 for entry in &tree_entries {
1740 tree_map.insert(&entry.path, entry);
1741 }
1742
1743 let mut result = Vec::new();
1744 let mut stage0_paths = std::collections::BTreeSet::new();
1745 let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
1746 std::collections::BTreeMap::new();
1747
1748 for ie in &index.entries {
1750 let path = String::from_utf8_lossy(&ie.path).to_string();
1751 if ie.stage() == 0 && ie.intent_to_add() {
1752 continue;
1755 }
1756 if ie.stage() != 0 {
1757 let rank = match ie.stage() {
1758 2 => 0u8,
1759 3 => 1u8,
1760 1 => 2u8,
1761 _ => 3u8,
1762 };
1763 match unmerged_modes.get(&path) {
1764 Some((existing_rank, _)) if *existing_rank <= rank => {}
1765 _ => {
1766 unmerged_modes.insert(path, (rank, ie.mode));
1767 }
1768 }
1769 continue;
1770 }
1771 if ignore_submodules && ie.mode == 0o160000 {
1772 let _ = tree_map.remove(path.as_str());
1773 stage0_paths.insert(path.clone());
1774 continue;
1775 }
1776 stage0_paths.insert(path.clone());
1777 match tree_map.remove(path.as_str()) {
1778 Some(te) => {
1779 if te.oid != ie.oid || te.mode != ie.mode {
1781 result.push(DiffEntry {
1782 status: DiffStatus::Modified,
1783 old_path: Some(path.clone()),
1784 new_path: Some(path),
1785 old_mode: format_mode(te.mode),
1786 new_mode: format_mode(ie.mode),
1787 old_oid: te.oid,
1788 new_oid: ie.oid,
1789 score: None,
1790 });
1791 }
1792 }
1793 None => {
1794 result.push(DiffEntry {
1796 status: DiffStatus::Added,
1797 old_path: None,
1798 new_path: Some(path),
1799 old_mode: "000000".to_owned(),
1800 new_mode: format_mode(ie.mode),
1801 old_oid: zero_oid(),
1802 new_oid: ie.oid,
1803 score: None,
1804 });
1805 }
1806 }
1807 }
1808
1809 for (path, (_, mode)) in &unmerged_modes {
1810 if stage0_paths.contains(path) {
1811 continue;
1812 }
1813 tree_map.remove(path.as_str());
1814 result.push(DiffEntry {
1815 status: DiffStatus::Unmerged,
1816 old_path: Some(path.clone()),
1817 new_path: Some(path.clone()),
1818 old_mode: "000000".to_owned(),
1819 new_mode: format_mode(*mode),
1820 old_oid: zero_oid(),
1821 new_oid: zero_oid(),
1822 score: None,
1823 });
1824 }
1825
1826 for (path, te) in tree_map {
1828 if ignore_submodules && te.mode == 0o160000 {
1829 continue;
1830 }
1831 result.push(DiffEntry {
1832 status: DiffStatus::Deleted,
1833 old_path: Some(path.to_owned()),
1834 new_path: None,
1835 old_mode: format_mode(te.mode),
1836 new_mode: "000000".to_owned(),
1837 old_oid: te.oid,
1838 new_oid: zero_oid(),
1839 score: None,
1840 });
1841 }
1842
1843 result.sort_by(|a, b| a.path().cmp(b.path()));
1844 Ok(result)
1845}
1846
1847pub fn diff_index_to_worktree(
1872 odb: &Odb,
1873 index: &Index,
1874 work_tree: &Path,
1875 ignore_submodule_untracked: bool,
1876 simplify_gitlinks: bool,
1877) -> Result<Vec<DiffEntry>> {
1878 diff_index_to_worktree_with_options(
1879 odb,
1880 index,
1881 work_tree,
1882 DiffIndexToWorktreeOptions {
1883 ignore_submodule_untracked,
1884 simplify_gitlinks,
1885 ..DiffIndexToWorktreeOptions::default()
1886 },
1887 )
1888}
1889
1890#[derive(Debug, Clone, Copy, Default)]
1892pub struct DiffIndexToWorktreeOptions {
1893 pub index_mtime: Option<(u32, u32)>,
1899 pub ignore_submodule_untracked: bool,
1901 pub simplify_gitlinks: bool,
1903 pub error_on_broken_gitlinks: bool,
1906}
1907
1908pub fn diff_index_to_worktree_with_options(
1925 odb: &Odb,
1926 index: &Index,
1927 work_tree: &Path,
1928 options: DiffIndexToWorktreeOptions,
1929) -> Result<Vec<DiffEntry>> {
1930 use crate::config::ConfigSet;
1931 use crate::crlf;
1932
1933 let ignore_submodule_untracked = options.ignore_submodule_untracked;
1934 let simplify_gitlinks = options.simplify_gitlinks;
1935
1936 let git_dir = work_tree.join(".git");
1937 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1938 let conv = crlf::ConversionConfig::from_config(&config);
1939 let attrs = crlf::load_gitattributes(work_tree);
1940
1941 let mut result = Vec::new();
1942 let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
1943 std::collections::BTreeMap::new();
1944
1945 for ie in &index.entries {
1946 if ie.stage() != 0 {
1947 let path = String::from_utf8_lossy(&ie.path).to_string();
1948 let rank = match ie.stage() {
1949 2 => 0u8,
1950 3 => 1u8,
1951 1 => 2u8,
1952 _ => 3u8,
1953 };
1954 match unmerged_base.get(&path) {
1955 Some((existing_rank, _)) if *existing_rank <= rank => {}
1956 _ => {
1957 unmerged_base.insert(path, (rank, ie));
1958 }
1959 }
1960 continue;
1961 }
1962 if ie.skip_worktree() || ie.assume_unchanged() {
1965 continue;
1966 }
1967 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
1970 let is_intent_to_add = ie.intent_to_add();
1971
1972 if ie.mode == 0o160000 {
1977 let sub_dir = work_tree.join(path_str_ref);
1978 let sub_head_oid = read_submodule_head_oid(&sub_dir);
1979 if !simplify_gitlinks && sub_head_oid.is_none() && !sub_dir.exists() {
1986 let path_owned = path_str_ref.to_owned();
1987 result.push(DiffEntry {
1988 status: DiffStatus::Deleted,
1989 old_path: Some(path_owned.clone()),
1990 new_path: Some(path_owned),
1991 old_mode: format_mode(ie.mode),
1992 new_mode: "000000".to_owned(),
1993 old_oid: ie.oid,
1994 new_oid: zero_oid(),
1995 score: None,
1996 });
1997 continue;
1998 }
1999 let ref_matches = if let Some(oid) = sub_head_oid {
2000 oid == ie.oid
2001 } else {
2002 let is_placeholder = submodule_worktree_is_unpopulated_placeholder(&sub_dir);
2003 if options.error_on_broken_gitlinks
2004 && !is_placeholder
2005 && submodule_embedded_git_dir(&sub_dir).is_some()
2006 {
2007 return Err(Error::ConfigError(format!(
2008 "could not read submodule HEAD for '{path_str_ref}'"
2009 )));
2010 }
2011 is_placeholder
2012 };
2013 if simplify_gitlinks {
2014 if !ref_matches {
2015 let path_owned = path_str_ref.to_owned();
2016 let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
2017 result.push(DiffEntry {
2018 status: DiffStatus::Modified,
2019 old_path: Some(path_owned.clone()),
2020 new_path: Some(path_owned),
2021 old_mode: format_mode(ie.mode),
2022 new_mode: format_mode(ie.mode),
2023 old_oid: ie.oid,
2024 new_oid,
2025 score: None,
2026 });
2027 }
2028 continue;
2029 }
2030 if sub_head_oid.is_some() && submodule_head_object_broken(&sub_dir) {
2035 return Err(Error::ConfigError(format!(
2036 "'git status --porcelain=2' failed in submodule {path_str_ref}"
2037 )));
2038 }
2039 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
2040 if ignore_submodule_untracked {
2041 flags.untracked = false;
2042 }
2043 let inner_dirty = flags.modified || flags.untracked;
2044 if !ref_matches || inner_dirty {
2045 let path_owned = path_str_ref.to_owned();
2046 let new_oid = if !ref_matches {
2047 sub_head_oid.unwrap_or_else(zero_oid)
2048 } else {
2049 zero_oid()
2050 };
2051 result.push(DiffEntry {
2052 status: DiffStatus::Modified,
2053 old_path: Some(path_owned.clone()),
2054 new_path: Some(path_owned),
2055 old_mode: format_mode(ie.mode),
2056 new_mode: format_mode(ie.mode),
2057 old_oid: ie.oid,
2058 new_oid,
2059 score: None,
2060 });
2061 }
2062 continue;
2063 }
2064
2065 let file_path = work_tree.join(path_str_ref);
2066
2067 if is_intent_to_add {
2068 match fs::symlink_metadata(&file_path) {
2069 Ok(meta) => {
2070 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
2071 let worktree_oid = hash_worktree_file(
2072 odb,
2073 &file_path,
2074 &meta,
2075 &conv,
2076 &file_attrs,
2077 path_str_ref,
2078 None,
2079 )?;
2080 let worktree_mode = mode_from_metadata(&meta);
2081 result.push(DiffEntry {
2082 status: DiffStatus::Added,
2083 old_path: None,
2084 new_path: Some(path_str_ref.to_owned()),
2085 old_mode: "000000".to_owned(),
2086 new_mode: format_mode(worktree_mode),
2087 old_oid: zero_oid(),
2090 new_oid: worktree_oid,
2091 score: None,
2092 });
2093 }
2094 Err(e)
2095 if e.kind() == std::io::ErrorKind::NotFound
2096 || e.raw_os_error() == Some(20) =>
2097 {
2098 result.push(DiffEntry {
2099 status: DiffStatus::Deleted,
2100 old_path: Some(path_str_ref.to_owned()),
2101 new_path: None,
2102 old_mode: format_mode(ie.mode),
2103 new_mode: "000000".to_owned(),
2104 old_oid: ie.oid,
2105 new_oid: zero_oid(),
2106 score: None,
2107 });
2108 }
2109 Err(e) => return Err(Error::Io(e)),
2110 }
2111 continue;
2112 }
2113
2114 if has_symlink_in_path(work_tree, path_str_ref) {
2117 result.push(DiffEntry {
2118 status: DiffStatus::Deleted,
2119 old_path: Some(path_str_ref.to_owned()),
2120 new_path: None,
2121 old_mode: format_mode(ie.mode),
2122 new_mode: "000000".to_owned(),
2123 old_oid: ie.oid,
2124 new_oid: zero_oid(),
2125 score: None,
2126 });
2127 continue;
2128 }
2129
2130 match fs::symlink_metadata(&file_path) {
2131 Ok(meta) if meta.is_dir() => {
2132 if file_path.join(".git").exists() {
2137 let head = read_submodule_head_oid(&file_path).unwrap_or_else(zero_oid);
2138 let path_owned = path_str_ref.to_owned();
2139 result.push(DiffEntry {
2140 status: DiffStatus::TypeChanged,
2141 old_path: Some(path_owned.clone()),
2142 new_path: Some(path_owned),
2143 old_mode: format_mode(ie.mode),
2144 new_mode: format_mode(0o160000),
2145 old_oid: ie.oid,
2146 new_oid: head,
2147 score: None,
2148 });
2149 continue;
2150 }
2151 result.push(DiffEntry {
2152 status: DiffStatus::Deleted,
2153 old_path: Some(path_str_ref.to_owned()),
2154 new_path: None,
2155 old_mode: format_mode(ie.mode),
2156 new_mode: String::new(),
2157 old_oid: ie.oid,
2158 new_oid: zero_oid(),
2159 score: None,
2160 });
2161 }
2162 Ok(meta) => {
2163 let worktree_mode = mode_from_metadata(&meta);
2164 let stat_same = stat_matches(ie, &meta);
2165 if stat_same && worktree_mode != ie.mode {
2167 let path_owned = path_str_ref.to_owned();
2168 result.push(DiffEntry {
2169 status: DiffStatus::Modified,
2170 old_path: Some(path_owned.clone()),
2171 new_path: Some(path_owned),
2172 old_mode: format_mode(ie.mode),
2173 new_mode: format_mode(worktree_mode),
2174 old_oid: ie.oid,
2175 new_oid: ie.oid,
2176 score: None,
2177 });
2178 continue;
2179 }
2180
2181 if stat_same && worktree_mode == ie.mode && !entry_is_racy(ie, options.index_mtime) {
2184 continue;
2185 }
2186
2187 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
2189 let worktree_oid = hash_worktree_file(
2190 odb,
2191 &file_path,
2192 &meta,
2193 &conv,
2194 &file_attrs,
2195 path_str_ref,
2196 Some(ie),
2197 )?;
2198
2199 let mut eff_oid = worktree_oid;
2203 if eff_oid != ie.oid {
2204 if let Ok(raw) = fs::read(&file_path) {
2205 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2206 if raw_oid == ie.oid {
2207 eff_oid = ie.oid;
2208 }
2209 }
2210 }
2211
2212 if eff_oid != ie.oid || worktree_mode != ie.mode {
2213 let path_owned = path_str_ref.to_owned();
2214 result.push(DiffEntry {
2215 status: DiffStatus::Modified,
2216 old_path: Some(path_owned.clone()),
2217 new_path: Some(path_owned),
2218 old_mode: format_mode(ie.mode),
2219 new_mode: format_mode(worktree_mode),
2220 old_oid: ie.oid,
2221 new_oid: eff_oid,
2222 score: None,
2223 });
2224 }
2225 }
2226 Err(e) if e.kind() == std::io::ErrorKind::NotFound
2227 || e.raw_os_error() == Some(20) => {
2228 result.push(DiffEntry {
2230 status: DiffStatus::Deleted,
2231 old_path: Some(path_str_ref.to_owned()),
2232 new_path: None,
2233 old_mode: format_mode(ie.mode),
2234 new_mode: "000000".to_owned(),
2235 old_oid: ie.oid,
2236 new_oid: zero_oid(),
2237 score: None,
2238 });
2239 }
2240 Err(e) => return Err(Error::Io(e)),
2241 }
2242 }
2243
2244 for (path, (_, base_entry)) in unmerged_base {
2245 let file_path = work_tree.join(&path);
2246 let wt_meta = match fs::symlink_metadata(&file_path) {
2247 Ok(meta) => Some(meta),
2248 Err(e)
2249 if e.kind() == std::io::ErrorKind::NotFound
2250 || e.raw_os_error() == Some(20) =>
2251 {
2252 None
2253 }
2254 Err(e) => return Err(Error::Io(e)),
2255 };
2256
2257 let new_mode = wt_meta.as_ref().map_or_else(
2258 || "000000".to_owned(),
2259 |meta| format_mode(mode_from_metadata(meta)),
2260 );
2261 result.push(DiffEntry {
2262 status: DiffStatus::Unmerged,
2263 old_path: Some(path.clone()),
2264 new_path: Some(path.clone()),
2265 old_mode: "000000".to_owned(),
2266 new_mode,
2267 old_oid: zero_oid(),
2268 new_oid: zero_oid(),
2269 score: None,
2270 });
2271
2272 if let Some(meta) = wt_meta {
2273 let file_attrs = crlf::get_file_attrs(&attrs, &path, false, &config);
2274 let wt_oid = hash_worktree_file(
2275 odb,
2276 &file_path,
2277 &meta,
2278 &conv,
2279 &file_attrs,
2280 &path,
2281 Some(base_entry),
2282 )?;
2283 let wt_mode = mode_from_metadata(&meta);
2284 if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
2285 result.push(DiffEntry {
2286 status: DiffStatus::Modified,
2287 old_path: Some(path.clone()),
2288 new_path: Some(path),
2289 old_mode: format_mode(base_entry.mode),
2290 new_mode: format_mode(wt_mode),
2291 old_oid: base_entry.oid,
2292 new_oid: wt_oid,
2293 score: None,
2294 });
2295 }
2296 }
2297 }
2298
2299 Ok(result)
2300}
2301
2302fn entry_is_racy(ie: &IndexEntry, index_mtime: Option<(u32, u32)>) -> bool {
2303 let Some((index_mtime_sec, index_mtime_nsec)) = index_mtime else {
2304 return false;
2305 };
2306 if index_mtime_sec == 0 {
2307 return false;
2308 }
2309 index_mtime_sec < ie.mtime_sec
2310 || (index_mtime_sec == ie.mtime_sec && index_mtime_nsec <= ie.mtime_nsec)
2311}
2312
2313pub fn worktree_differs_from_index_entry(
2321 odb: &Odb,
2322 work_tree: &Path,
2323 ie: &IndexEntry,
2324 ignore_submodule_untracked: bool,
2325) -> Result<bool> {
2326 use crate::config::ConfigSet;
2327 use crate::crlf;
2328
2329 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
2330 let file_path = work_tree.join(path_str_ref);
2331
2332 if ie.mode == 0o160000 {
2333 let sub_head_oid = read_submodule_head(&file_path);
2334 let ref_matches = match sub_head_oid {
2335 Some(oid) => oid == ie.oid,
2336 None => submodule_worktree_is_unpopulated_placeholder(&file_path),
2337 };
2338 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
2339 if ignore_submodule_untracked {
2340 flags.untracked = false;
2341 }
2342 return Ok(!ref_matches || flags.modified || flags.untracked);
2343 }
2344
2345 let meta = match fs::symlink_metadata(&file_path) {
2346 Ok(m) => m,
2347 Err(e)
2348 if e.kind() == std::io::ErrorKind::NotFound
2349 || e.raw_os_error() == Some(20) =>
2350 {
2351 return Ok(true);
2352 }
2353 Err(e) => return Err(Error::Io(e)),
2354 };
2355
2356 if meta.is_dir() {
2357 return Ok(true);
2358 }
2359
2360 let worktree_mode = mode_from_metadata(&meta);
2361 if worktree_mode != ie.mode {
2362 return Ok(true);
2363 }
2364
2365 let git_dir = work_tree.join(".git");
2366 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
2367 let conv = crlf::ConversionConfig::from_config(&config);
2368 let attrs = crlf::load_gitattributes(work_tree);
2369 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
2370 let worktree_oid = hash_worktree_file(
2371 odb,
2372 &file_path,
2373 &meta,
2374 &conv,
2375 &file_attrs,
2376 path_str_ref,
2377 Some(ie),
2378 )?;
2379
2380 let mut eff_oid = worktree_oid;
2381 if eff_oid != ie.oid {
2382 if let Ok(raw) = fs::read(&file_path) {
2383 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2384 if raw_oid == ie.oid {
2385 eff_oid = ie.oid;
2386 }
2387 }
2388 }
2389
2390 Ok(eff_oid != ie.oid)
2391}
2392
2393pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
2394 if meta.len() as u32 != ie.size {
2396 return false;
2397 }
2398 #[cfg(unix)]
2399 {
2400 use std::os::unix::fs::MetadataExt;
2401 if meta.mtime() as u32 != ie.mtime_sec {
2403 return false;
2404 }
2405 if meta.mtime_nsec() as u32 != ie.mtime_nsec {
2406 return false;
2407 }
2408 if meta.ctime() as u32 != ie.ctime_sec {
2410 return false;
2411 }
2412 if meta.ctime_nsec() as u32 != ie.ctime_nsec {
2413 return false;
2414 }
2415 if meta.ino() as u32 != ie.ino {
2417 return false;
2418 }
2419 if meta.dev() as u32 != ie.dev {
2420 return false;
2421 }
2422 }
2423 #[cfg(not(unix))]
2424 {
2425 use std::time::UNIX_EPOCH;
2426 if let Ok(mtime) = meta.modified() {
2427 if let Ok(dur) = mtime.duration_since(UNIX_EPOCH) {
2428 if dur.as_secs() as u32 != ie.mtime_sec {
2429 return false;
2430 }
2431 if dur.subsec_nanos() != ie.mtime_nsec {
2432 return false;
2433 }
2434 }
2435 }
2436 }
2437 true
2438}
2439
2440pub fn refresh_index_stat_content_verified(
2460 index: &mut Index,
2461 work_tree: &Path,
2462 index_mtime: Option<(u32, u32)>,
2463) -> bool {
2464 use crate::index::{MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK};
2465 let mut changed = false;
2466 for ie in &mut index.entries {
2467 if ie.stage() != 0 || ie.skip_worktree() || ie.assume_unchanged() || ie.intent_to_add() {
2468 continue;
2469 }
2470 if ie.mode != MODE_REGULAR && ie.mode != MODE_EXECUTABLE && ie.mode != MODE_SYMLINK {
2471 continue;
2472 }
2473 let Ok(path) = std::str::from_utf8(&ie.path) else {
2474 continue;
2475 };
2476 let abs = work_tree.join(path);
2477 let Ok(meta) = fs::symlink_metadata(&abs) else {
2478 continue;
2479 };
2480 if stat_matches(ie, &meta) {
2481 if entry_is_racy(ie, index_mtime)
2487 && !worktree_content_matches_index_oid(ie, &abs, &meta)
2488 {
2489 invalidate_index_stat_cache(ie);
2490 changed = true;
2491 }
2492 continue;
2493 }
2494 if !worktree_content_matches_index_oid(ie, &abs, &meta) {
2495 continue;
2496 }
2497 let refreshed = crate::index::entry_from_metadata(&meta, &ie.path, ie.oid, ie.mode);
2498 ie.ctime_sec = refreshed.ctime_sec;
2499 ie.ctime_nsec = refreshed.ctime_nsec;
2500 ie.mtime_sec = refreshed.mtime_sec;
2501 ie.mtime_nsec = refreshed.mtime_nsec;
2502 ie.dev = refreshed.dev;
2503 ie.ino = refreshed.ino;
2504 ie.uid = refreshed.uid;
2505 ie.gid = refreshed.gid;
2506 ie.size = refreshed.size;
2507 changed = true;
2508 }
2509 changed
2510}
2511
2512fn worktree_content_matches_index_oid(ie: &IndexEntry, abs: &Path, meta: &fs::Metadata) -> bool {
2514 use crate::index::{MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK};
2515 if ie.mode == MODE_SYMLINK {
2516 if !meta.file_type().is_symlink() {
2517 return false;
2518 }
2519 use std::os::unix::ffi::OsStrExt as _;
2520 fs::read_link(abs)
2521 .map(|t| Odb::hash_object_data(ObjectKind::Blob, t.as_os_str().as_bytes()) == ie.oid)
2522 .unwrap_or(false)
2523 } else if ie.mode == MODE_REGULAR || ie.mode == MODE_EXECUTABLE {
2524 if !meta.file_type().is_file() {
2525 return false;
2526 }
2527 fs::read(abs)
2528 .map(|bytes| Odb::hash_object_data(ObjectKind::Blob, &bytes) == ie.oid)
2529 .unwrap_or(false)
2530 } else {
2531 false
2532 }
2533}
2534
2535fn invalidate_index_stat_cache(ie: &mut IndexEntry) {
2537 ie.ctime_sec = 0;
2538 ie.ctime_nsec = 0;
2539 ie.mtime_sec = 0;
2540 ie.mtime_nsec = 0;
2541 ie.dev = 0;
2542 ie.ino = 0;
2543 ie.size = 0;
2544}
2545
2546fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
2549 let mut check = work_tree.to_path_buf();
2550 let components: Vec<&str> = rel_path.split('/').collect();
2551 for component in &components[..components.len().saturating_sub(1)] {
2553 check.push(component);
2554 match fs::symlink_metadata(&check) {
2555 Ok(meta) if meta.file_type().is_symlink() => return true,
2556 _ => {}
2557 }
2558 }
2559 false
2560}
2561
2562pub fn hash_worktree_file(
2563 odb: &Odb,
2564 path: &Path,
2565 meta: &fs::Metadata,
2566 conv: &crate::crlf::ConversionConfig,
2567 file_attrs: &crate::crlf::FileAttrs,
2568 rel_path: &str,
2569 index_entry: Option<&IndexEntry>,
2570) -> Result<ObjectId> {
2571 let prior_blob: Option<Vec<u8>> = index_entry
2572 .filter(|e| e.oid != zero_oid())
2573 .and_then(|e| odb.read(&e.oid).ok().map(|o| o.data));
2574 let data = if meta.file_type().is_symlink() {
2575 let target = fs::read_link(path)?;
2577 target.to_string_lossy().into_owned().into_bytes()
2578 } else if meta.is_dir() {
2579 Vec::new()
2582 } else {
2583 let raw = fs::read(path)?;
2584 let opts = crate::crlf::ConvertToGitOpts {
2587 index_blob: prior_blob.as_deref(),
2588 renormalize: false,
2589 check_safecrlf: false,
2590 };
2591 crate::crlf::convert_to_git_with_opts(&raw, rel_path, conv, file_attrs, opts).unwrap_or(raw)
2592 };
2593
2594 Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
2595}
2596
2597pub fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
2599 if meta.file_type().is_symlink() {
2600 0o120000
2601 } else {
2602 #[cfg(unix)]
2603 {
2604 if meta.mode() & 0o111 != 0 {
2605 return 0o100755;
2606 }
2607 }
2608 0o100644
2609 }
2610}
2611
2612pub fn diff_tree_to_worktree(
2629 odb: &Odb,
2630 tree_oid: Option<&ObjectId>,
2631 work_tree: &Path,
2632 index: &Index,
2633) -> Result<Vec<DiffEntry>> {
2634 use crate::config::ConfigSet;
2635 use crate::crlf;
2636
2637 let git_dir = work_tree.join(".git");
2638 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
2639 let conv = crlf::ConversionConfig::from_config(&config);
2640 let attrs = crlf::load_gitattributes(work_tree);
2641
2642 let tree_flat = match tree_oid {
2644 Some(oid) => flatten_tree(odb, oid, "")?,
2645 None => Vec::new(),
2646 };
2647 let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
2648 tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
2649
2650 let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
2652 std::collections::BTreeMap::new();
2653 let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
2654 let mut stage0_paths: std::collections::BTreeSet<Vec<u8>> = std::collections::BTreeSet::new();
2655 for ie in &index.entries {
2656 if ie.stage() != 0 {
2657 continue;
2658 }
2659 let path = String::from_utf8_lossy(&ie.path).to_string();
2660 index_entries.insert(&ie.path, ie);
2661 index_paths.insert(path);
2662 stage0_paths.insert(ie.path.clone());
2663 }
2664
2665 let mut unmerged_only_paths: std::collections::BTreeSet<String> =
2668 std::collections::BTreeSet::new();
2669 for ie in &index.entries {
2670 if !(1..=3).contains(&ie.stage()) {
2671 continue;
2672 }
2673 if stage0_paths.contains(&ie.path) {
2674 continue;
2675 }
2676 unmerged_only_paths.insert(String::from_utf8_lossy(&ie.path).into_owned());
2677 }
2678
2679 let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
2681 all_paths.extend(tree_map.keys().cloned());
2682 all_paths.extend(index_paths.iter().cloned());
2683 all_paths.extend(unmerged_only_paths.iter().cloned());
2684
2685 let mut result = Vec::new();
2686
2687 for path in &all_paths {
2688 if index_entries
2689 .get(path.as_bytes())
2690 .is_some_and(|ie| ie.skip_worktree())
2691 {
2692 continue;
2695 }
2696
2697 let tree_entry = tree_map.get(path.as_str());
2698
2699 let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
2701 || index_entries
2702 .get(path.as_bytes())
2703 .is_some_and(|ie| ie.mode == 0o160000);
2704 if is_gitlink {
2705 let sub_dir = work_tree.join(path);
2706 let index_gitlink_oid = index_entries
2707 .get(path.as_bytes())
2708 .filter(|ie| ie.mode == 0o160000)
2709 .map(|ie| ie.oid);
2710 match (tree_entry, index_gitlink_oid) {
2711 (Some(te), _) => {
2712 let sub_head = read_submodule_head_oid(&sub_dir);
2713 if sub_head.is_none() && !sub_dir.exists() {
2717 result.push(DiffEntry {
2718 status: DiffStatus::Deleted,
2719 old_path: Some(path.clone()),
2720 new_path: Some(path.clone()),
2721 old_mode: format_mode(te.mode),
2722 new_mode: "000000".to_string(),
2723 old_oid: te.oid,
2724 new_oid: zero_oid(),
2725 score: None,
2726 });
2727 continue;
2728 }
2729 let index_matches_tree = index_gitlink_oid.is_some_and(|oid| oid == te.oid);
2730 let head_differs = sub_head.as_ref() != Some(&te.oid);
2731 let dirty_while_aligned = index_matches_tree
2732 && !head_differs
2733 && submodule_has_dirty_worktree_for_super_diff(work_tree, path, &te.oid);
2734 if head_differs || dirty_while_aligned {
2735 let new_oid = if head_differs { zero_oid() } else { te.oid };
2739 result.push(DiffEntry {
2740 status: DiffStatus::Modified,
2741 old_path: Some(path.clone()),
2742 new_path: Some(path.clone()),
2743 old_mode: format_mode(te.mode),
2744 new_mode: format_mode(te.mode),
2745 old_oid: te.oid,
2746 new_oid,
2747 score: None,
2748 });
2749 }
2750 }
2751 (None, Some(idx_oid)) => {
2752 let new_oid = read_submodule_head_oid(&sub_dir).unwrap_or(idx_oid);
2757 result.push(DiffEntry {
2758 status: DiffStatus::Added,
2759 old_path: Some(path.clone()),
2760 new_path: Some(path.clone()),
2761 old_mode: "000000".to_string(),
2762 new_mode: format_mode(0o160000),
2763 old_oid: zero_oid(),
2764 new_oid,
2765 score: None,
2766 });
2767 }
2768 (None, None) => {}
2769 }
2770 continue;
2771 }
2772
2773 let file_path = work_tree.join(path);
2774
2775 let wt_meta = match fs::symlink_metadata(&file_path) {
2776 Ok(m) => Some(m),
2777 Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
2778 Err(e) => return Err(Error::Io(e)),
2779 };
2780
2781 if unmerged_only_paths.contains(path) {
2782 if let (Some(te), Some(meta)) = (tree_entry, wt_meta.as_ref()) {
2783 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
2784 let wt_oid =
2785 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, None)?;
2786 let wt_mode = mode_from_metadata(meta);
2787 if wt_oid != te.oid || wt_mode != te.mode {
2788 result.push(DiffEntry {
2789 status: DiffStatus::Modified,
2790 old_path: Some(path.clone()),
2791 new_path: Some(path.clone()),
2792 old_mode: format_mode(te.mode),
2793 new_mode: format_mode(wt_mode),
2794 old_oid: te.oid,
2795 new_oid: wt_oid,
2796 score: None,
2797 });
2798 }
2799 }
2800 continue;
2801 }
2802
2803 match (tree_entry, wt_meta) {
2804 (Some(te), Some(ref meta)) => {
2805 let wt_mode = mode_from_metadata(meta);
2806 let Some(ie) = index_entries.get(path.as_bytes()) else {
2807 continue;
2808 };
2809
2810 let index_matches_tree = ie.oid == te.oid && ie.mode == te.mode;
2811
2812 if index_matches_tree && wt_mode == te.mode && stat_matches(ie, meta) {
2814 continue;
2815 }
2816
2817 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
2818 let idx_ent = index_entries.get(path.as_bytes()).copied();
2819
2820 if ie.oid == te.oid && ie.mode != te.mode {
2822 result.push(DiffEntry {
2823 status: DiffStatus::Modified,
2824 old_path: Some(path.clone()),
2825 new_path: Some(path.clone()),
2826 old_mode: format_mode(te.mode),
2827 new_mode: format_mode(ie.mode),
2828 old_oid: te.oid,
2829 new_oid: te.oid,
2830 score: None,
2831 });
2832 continue;
2833 }
2834
2835 if index_matches_tree {
2838 let wt_oid = hash_worktree_file(
2839 odb,
2840 &file_path,
2841 meta,
2842 &conv,
2843 &file_attrs,
2844 path,
2845 idx_ent,
2846 )?;
2847 let mut eff_oid = wt_oid;
2848 if eff_oid != te.oid {
2849 if let Ok(raw) = fs::read(&file_path) {
2850 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2851 if raw_oid == te.oid {
2852 eff_oid = te.oid;
2853 }
2854 }
2855 }
2856 if eff_oid != te.oid {
2857 result.push(DiffEntry {
2858 status: DiffStatus::Modified,
2859 old_path: Some(path.clone()),
2860 new_path: Some(path.clone()),
2861 old_mode: format_mode(te.mode),
2862 new_mode: format_mode(wt_mode),
2863 old_oid: te.oid,
2864 new_oid: eff_oid,
2865 score: None,
2866 });
2867 } else if wt_mode != te.mode {
2868 result.push(DiffEntry {
2869 status: DiffStatus::Modified,
2870 old_path: Some(path.clone()),
2871 new_path: Some(path.clone()),
2872 old_mode: format_mode(te.mode),
2873 new_mode: format_mode(wt_mode),
2874 old_oid: te.oid,
2875 new_oid: te.oid,
2876 score: None,
2877 });
2878 }
2879 continue;
2880 }
2881
2882 let wt_oid =
2884 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, idx_ent)?;
2885 let mut eff_oid = wt_oid;
2886 if eff_oid != te.oid {
2887 if let Ok(raw) = fs::read(&file_path) {
2888 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2889 if raw_oid == te.oid {
2890 eff_oid = te.oid;
2891 }
2892 }
2893 }
2894 if eff_oid != te.oid || wt_mode != te.mode {
2895 result.push(DiffEntry {
2896 status: DiffStatus::Modified,
2897 old_path: Some(path.clone()),
2898 new_path: Some(path.clone()),
2899 old_mode: format_mode(te.mode),
2900 new_mode: format_mode(wt_mode),
2901 old_oid: te.oid,
2902 new_oid: eff_oid,
2903 score: None,
2904 });
2905 }
2906 }
2907 (Some(te), None) => {
2908 result.push(DiffEntry {
2910 status: DiffStatus::Deleted,
2911 old_path: Some(path.clone()),
2912 new_path: None,
2913 old_mode: format_mode(te.mode),
2914 new_mode: "000000".to_owned(),
2915 old_oid: te.oid,
2916 new_oid: zero_oid(),
2917 score: None,
2918 });
2919 }
2920 (None, Some(ref meta)) => {
2921 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
2923 let wt_oid = hash_worktree_file(
2924 odb,
2925 &file_path,
2926 meta,
2927 &conv,
2928 &file_attrs,
2929 path,
2930 index_entries.get(path.as_bytes()).copied(),
2931 )?;
2932 let wt_mode = mode_from_metadata(meta);
2933 result.push(DiffEntry {
2934 status: DiffStatus::Added,
2935 old_path: None,
2936 new_path: Some(path.clone()),
2937 old_mode: "000000".to_owned(),
2938 new_mode: format_mode(wt_mode),
2939 old_oid: zero_oid(),
2940 new_oid: wt_oid,
2941 score: None,
2942 });
2943 }
2944 (None, None) => {
2945 }
2947 }
2948 }
2949
2950 result.sort_by(|a, b| a.path().cmp(b.path()));
2951 Ok(result)
2952}
2953
2954fn read_added_entry_bytes(
2957 odb: &Odb,
2958 entry: &DiffEntry,
2959 work_root: Option<&Path>,
2960) -> Option<Vec<u8>> {
2961 if entry.new_oid != zero_oid() {
2962 return odb.read(&entry.new_oid).ok().map(|obj| obj.data);
2963 }
2964 let path = entry.new_path.as_deref()?;
2965 let root = work_root?;
2966 fs::read(root.join(path)).ok()
2967}
2968
2969fn modified_as_copy_from_sources(
2970 odb: &Odb,
2971 work_root: Option<&Path>,
2972 e: &DiffEntry,
2973 threshold: u32,
2974 sources: &[(String, ObjectId, bool)],
2975 source_contents: &[Option<Vec<u8>>],
2976 source_tree_entries: &[(String, String, ObjectId)],
2977) -> Option<DiffEntry> {
2978 fn regular_file_mode(mode: &str) -> bool {
2979 mode == "100644" || mode == "100755"
2980 }
2981
2982 if e.status != DiffStatus::Modified || !regular_file_mode(&e.new_mode) {
2983 return None;
2984 }
2985 let new_data = read_added_entry_bytes(odb, e, work_root)?;
2986 let new_oid_eff = if e.new_oid != zero_oid() {
2987 e.new_oid
2988 } else {
2989 Odb::hash_object_data(ObjectKind::Blob, &new_data)
2990 };
2991
2992 let mut best: Option<(usize, u32)> = None;
2993 for (si, (src_path, src_oid, is_deleted)) in sources.iter().enumerate() {
2994 if *is_deleted {
2995 continue;
2996 }
2997 if e.new_path.as_deref() == Some(src_path.as_str()) {
2998 continue;
2999 }
3000 let src_mode_str = source_tree_entries
3001 .iter()
3002 .find(|(p, _, _)| p == src_path)
3003 .map(|(_, m, _)| m.as_str())
3004 .unwrap_or("100644");
3005 if !regular_file_mode(src_mode_str) {
3006 continue;
3007 }
3008
3009 let score = if *src_oid == new_oid_eff {
3010 100
3011 } else {
3012 match (&source_contents[si], Some(new_data.as_slice())) {
3013 (Some(old_data), Some(nd)) => compute_similarity(old_data, nd),
3014 _ => 0,
3015 }
3016 };
3017 if score >= threshold {
3018 let replace = match best {
3019 None => true,
3020 Some((_, s)) => score > s,
3021 };
3022 if replace {
3023 best = Some((si, score));
3024 }
3025 }
3026 }
3027
3028 let (si, score) = best?;
3029 let (src_path, src_oid, _) = &sources[si];
3030 let src_mode = source_tree_entries
3031 .iter()
3032 .find(|(p, _, _)| p == src_path)
3033 .map(|(_, m, _)| m.clone())
3034 .unwrap_or_else(|| e.old_mode.clone());
3035
3036 Some(DiffEntry {
3037 status: DiffStatus::Copied,
3038 old_path: Some(src_path.clone()),
3039 new_path: e.new_path.clone(),
3040 old_mode: src_mode,
3041 new_mode: e.new_mode.clone(),
3042 old_oid: *src_oid,
3043 new_oid: e.new_oid,
3044 score: Some(score),
3045 })
3046}
3047
3048pub fn detect_renames(
3060 odb: &Odb,
3061 work_root: Option<&Path>,
3062 entries: Vec<DiffEntry>,
3063 threshold: u32,
3064) -> Vec<DiffEntry> {
3065 let mut deleted: Vec<DiffEntry> = Vec::new();
3067 let mut added: Vec<DiffEntry> = Vec::new();
3068 let mut others: Vec<DiffEntry> = Vec::new();
3069
3070 for entry in entries {
3071 match entry.status {
3072 DiffStatus::Deleted => deleted.push(entry),
3073 DiffStatus::Added => added.push(entry),
3074 _ => others.push(entry),
3075 }
3076 }
3077
3078 if deleted.is_empty() || added.is_empty() {
3079 let mut result = others;
3081 result.extend(deleted);
3082 result.extend(added);
3083 result.sort_by(|a, b| a.path().cmp(b.path()));
3084 return result;
3085 }
3086
3087 let deleted_contents: Vec<Option<Vec<u8>>> = deleted
3089 .iter()
3090 .map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
3091 .collect();
3092
3093 let added_contents: Vec<Option<Vec<u8>>> = added
3095 .iter()
3096 .map(|a| read_added_entry_bytes(odb, a, work_root))
3097 .collect();
3098
3099 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
3102
3103 fn is_regularish_mode(mode: &str) -> bool {
3104 mode == "100644" || mode == "100755"
3105 }
3106
3107 fn same_path_same_blob(del: &DiffEntry, add: &DiffEntry) -> bool {
3108 del.old_path == add.new_path && del.old_oid == add.new_oid && del.old_mode == add.new_mode
3109 }
3110
3111 for (di, del) in deleted.iter().enumerate() {
3112 for (ai, add) in added.iter().enumerate() {
3113 if del.old_oid == add.new_oid {
3115 scores.push((100, di, ai));
3116 continue;
3117 }
3118
3119 if !is_regularish_mode(&del.old_mode) || !is_regularish_mode(&add.new_mode) {
3122 continue;
3123 }
3124
3125 let score = match (&deleted_contents[di], &added_contents[ai]) {
3126 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
3127 _ => 0,
3128 };
3129
3130 if score >= threshold {
3131 scores.push((score, di, ai));
3132 }
3133 }
3134 }
3135
3136 scores.sort_by(|a, b| {
3140 let a_noop = same_path_same_blob(&deleted[a.1], &added[a.2]);
3141 let b_noop = same_path_same_blob(&deleted[b.1], &added[b.2]);
3142 let a_same = same_basename(&deleted[a.1], &added[a.2]);
3143 let b_same = same_basename(&deleted[b.1], &added[b.2]);
3144 a_noop
3145 .cmp(&b_noop)
3146 .then_with(|| b_same.cmp(&a_same))
3147 .then_with(|| b.0.cmp(&a.0))
3148 });
3149
3150 let mut used_deleted = vec![false; deleted.len()];
3151 let mut used_added = vec![false; added.len()];
3152 let mut renames: Vec<DiffEntry> = Vec::new();
3153
3154 for (score, di, ai) in &scores {
3155 if used_deleted[*di] || used_added[*ai] {
3156 continue;
3157 }
3158 used_deleted[*di] = true;
3159 used_added[*ai] = true;
3160
3161 let del = &deleted[*di];
3162 let add = &added[*ai];
3163
3164 if same_path_same_blob(del, add) {
3169 continue;
3170 }
3171
3172 renames.push(DiffEntry {
3173 status: DiffStatus::Renamed,
3174 old_path: del.old_path.clone(),
3175 new_path: add.new_path.clone(),
3176 old_mode: del.old_mode.clone(),
3177 new_mode: add.new_mode.clone(),
3178 old_oid: del.old_oid,
3179 new_oid: add.new_oid,
3180 score: Some(*score),
3181 });
3182 }
3183
3184 let mut result = others;
3186 result.extend(renames);
3187 for (i, entry) in deleted.into_iter().enumerate() {
3188 if !used_deleted[i] {
3189 result.push(entry);
3190 }
3191 }
3192 for (i, entry) in added.into_iter().enumerate() {
3193 if !used_added[i] {
3194 result.push(entry);
3195 }
3196 }
3197
3198 result.sort_by(|a, b| a.path().cmp(b.path()));
3199 result
3200}
3201
3202pub fn detect_copies(
3213 odb: &Odb,
3214 work_root: Option<&Path>,
3215 entries: Vec<DiffEntry>,
3216 threshold: u32,
3217 find_copies_harder: bool,
3218 source_tree_entries: &[(String, String, ObjectId)],
3219) -> Vec<DiffEntry> {
3220 use std::collections::{HashMap, HashSet};
3221
3222 let mut deleted: Vec<DiffEntry> = Vec::new();
3224 let mut added: Vec<DiffEntry> = Vec::new();
3225 let mut others: Vec<DiffEntry> = Vec::new();
3226
3227 for entry in entries {
3228 match entry.status {
3229 DiffStatus::Deleted => deleted.push(entry),
3230 DiffStatus::Added => added.push(entry),
3231 _ => others.push(entry),
3232 }
3233 }
3234
3235 let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
3239
3240 for entry in &deleted {
3241 if let Some(ref path) = entry.old_path {
3242 deleted_source_idx.insert(path.clone(), sources.len());
3243 sources.push((path.clone(), entry.old_oid, true));
3244 }
3245 }
3246
3247 for entry in &others {
3250 if matches!(entry.status, DiffStatus::Modified | DiffStatus::TypeChanged) {
3251 if let Some(ref old_path) = entry.old_path {
3252 if !sources.iter().any(|(p, _, _)| p == old_path) {
3253 sources.push((old_path.clone(), entry.old_oid, false));
3254 }
3255 }
3256 }
3257 }
3258
3259 if find_copies_harder {
3261 for (path, _mode, oid) in source_tree_entries {
3262 if !sources.iter().any(|(p, _, _)| p == path) {
3263 sources.push((path.clone(), *oid, false));
3264 }
3265 }
3266 }
3267
3268 if sources.is_empty() {
3269 let mut result = others;
3270 result.extend(deleted);
3271 result.extend(added);
3272 result.sort_by(|a, b| a.path().cmp(b.path()));
3273 return result;
3274 }
3275
3276 let source_contents: Vec<Option<Vec<u8>>> = sources
3278 .iter()
3279 .map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
3280 .collect();
3281
3282 let mut result_entries: Vec<DiffEntry> = Vec::new();
3283 let mut renamed_deleted: HashSet<usize> = HashSet::new();
3284 let mut used_added2 = vec![false; added.len()];
3285
3286 if !added.is_empty() {
3287 let added_contents: Vec<Option<Vec<u8>>> = added
3289 .iter()
3290 .map(|a| read_added_entry_bytes(odb, a, work_root))
3291 .collect();
3292
3293 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
3295 for (si, (src_path, src_oid, _)) in sources.iter().enumerate() {
3296 for (ai, add) in added.iter().enumerate() {
3297 if add.new_path.as_deref() == Some(src_path.as_str()) {
3300 continue;
3301 }
3302 let add_oid = if add.new_oid != zero_oid() {
3303 add.new_oid
3304 } else if let Some(ref data) = added_contents[ai] {
3305 Odb::hash_object_data(ObjectKind::Blob, data)
3306 } else {
3307 zero_oid()
3308 };
3309 if *src_oid == add_oid {
3310 scores.push((100, si, ai));
3311 continue;
3312 }
3313 let score = match (&source_contents[si], &added_contents[ai]) {
3314 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
3315 _ => 0,
3316 };
3317 if score >= threshold {
3318 scores.push((score, si, ai));
3319 }
3320 }
3321 }
3322
3323 scores.sort_by(|a, b| b.0.cmp(&a.0));
3325
3326 let mut used_added = vec![false; added.len()];
3328 let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
3329 for &(score, si, ai) in &scores {
3330 if used_added[ai] {
3331 continue;
3332 }
3333 used_added[ai] = true;
3334 source_to_added.entry(si).or_default().push((ai, score));
3335 }
3336
3337 for (&si, assignments_for_src) in &source_to_added {
3339 let (_, _, is_deleted) = &sources[si];
3340 if *is_deleted && !assignments_for_src.is_empty() {
3341 let rename_ai = assignments_for_src
3344 .iter()
3345 .max_by_key(|(ai, _score)| added[*ai].path().to_string())
3346 .map(|(ai, _)| *ai);
3347
3348 for &(ai, score) in assignments_for_src {
3349 let (ref src_path, _, _) = sources[si];
3350 let add = &added[ai];
3351 let src_mode = source_tree_entries
3352 .iter()
3353 .find(|(p, _, _)| p == src_path)
3354 .map(|(_, m, _)| m.clone())
3355 .unwrap_or_else(|| add.old_mode.clone());
3356
3357 let is_rename = Some(ai) == rename_ai;
3358 result_entries.push(DiffEntry {
3359 status: if is_rename {
3360 DiffStatus::Renamed
3361 } else {
3362 DiffStatus::Copied
3363 },
3364 old_path: Some(src_path.clone()),
3365 new_path: add.new_path.clone(),
3366 old_mode: src_mode,
3367 new_mode: add.new_mode.clone(),
3368 old_oid: sources[si].1,
3369 new_oid: add.new_oid,
3370 score: Some(score),
3371 });
3372 used_added2[ai] = true;
3373 }
3374 renamed_deleted.insert(si);
3375 } else {
3376 for &(ai, score) in assignments_for_src {
3378 let (ref src_path, _, _) = sources[si];
3379 let add = &added[ai];
3380 let src_mode = source_tree_entries
3381 .iter()
3382 .find(|(p, _, _)| p == src_path)
3383 .map(|(_, m, _)| m.clone())
3384 .unwrap_or_else(|| add.old_mode.clone());
3385
3386 result_entries.push(DiffEntry {
3387 status: DiffStatus::Copied,
3388 old_path: Some(src_path.clone()),
3389 new_path: add.new_path.clone(),
3390 old_mode: src_mode,
3391 new_mode: add.new_mode.clone(),
3392 old_oid: sources[si].1,
3393 new_oid: add.new_oid,
3394 score: Some(score),
3395 });
3396 used_added2[ai] = true;
3397 }
3398 }
3399 }
3400 }
3401
3402 for entry in deleted.into_iter() {
3404 if let Some(ref path) = entry.old_path {
3405 if let Some(&si) = deleted_source_idx.get(path) {
3406 if renamed_deleted.contains(&si) {
3407 continue;
3409 }
3410 }
3411 }
3412 result_entries.push(entry);
3413 }
3414
3415 let mut result = others;
3416 result.extend(result_entries);
3417 for (i, entry) in added.into_iter().enumerate() {
3419 if !used_added2[i] {
3420 result.push(entry);
3421 }
3422 }
3423
3424 let mut final_result = Vec::with_capacity(result.len());
3425 for e in result {
3426 if let Some(c) = modified_as_copy_from_sources(
3427 odb,
3428 work_root,
3429 &e,
3430 threshold,
3431 &sources,
3432 &source_contents,
3433 source_tree_entries,
3434 ) {
3435 final_result.push(c);
3436 } else {
3437 final_result.push(e);
3438 }
3439 }
3440
3441 final_result.sort_by(|a, b| a.path().cmp(b.path()));
3442 final_result
3443}
3444
3445pub fn status_apply_rename_copy_detection(
3455 odb: &Odb,
3456 unstaged_raw: Vec<DiffEntry>,
3457 threshold: u32,
3458 copies: bool,
3459 head_tree: Option<&ObjectId>,
3460) -> Result<Vec<DiffEntry>> {
3461 if !copies {
3462 return Ok(detect_renames(odb, None, unstaged_raw, threshold));
3463 }
3464 let source_tree_entries: Vec<(String, String, ObjectId)> = match head_tree {
3465 Some(oid) => flatten_tree(odb, oid, "")?
3466 .into_iter()
3467 .map(|e| (e.path, format_mode(e.mode), e.oid))
3468 .collect(),
3469 None => Vec::new(),
3470 };
3471 Ok(detect_copies(
3472 odb,
3473 None,
3474 unstaged_raw,
3475 threshold,
3476 false,
3477 &source_tree_entries,
3478 ))
3479}
3480
3481pub fn format_rename_path(old: &str, new: &str) -> String {
3489 let ob = old.as_bytes();
3490 let nb = new.as_bytes();
3491
3492 let pfx = {
3494 let mut last_sep = 0usize;
3495 let min_len = ob.len().min(nb.len());
3496 for i in 0..min_len {
3497 if ob[i] != nb[i] {
3498 break;
3499 }
3500 if ob[i] == b'/' {
3501 last_sep = i + 1;
3502 }
3503 }
3504 last_sep
3505 };
3506
3507 let mut sfx = {
3509 let mut last_sep = 0usize;
3510 let min_len = ob.len().min(nb.len());
3511 for i in 0..min_len {
3512 let oi = ob.len() - 1 - i;
3513 let ni = nb.len() - 1 - i;
3514 if ob[oi] != nb[ni] {
3515 break;
3516 }
3517 if ob[oi] == b'/' {
3518 last_sep = i + 1;
3519 }
3520 }
3521 last_sep
3522 };
3523
3524 let mut sfx_at_old = ob.len() - sfx;
3526 let mut sfx_at_new = nb.len() - sfx;
3527
3528 while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
3531 let suffix_bytes = &ob[sfx_at_old..];
3533 let mut new_sfx = 0;
3534 for (i, &b) in suffix_bytes.iter().enumerate().skip(1) {
3536 if b == b'/' {
3537 new_sfx = sfx - i;
3538 break;
3539 }
3540 }
3541 if new_sfx == 0 || new_sfx >= sfx {
3542 sfx_at_old = ob.len();
3543 sfx_at_new = nb.len();
3544 break;
3545 }
3546 sfx = new_sfx;
3547 sfx_at_old = ob.len() - sfx;
3548 sfx_at_new = nb.len() - sfx;
3549 }
3550
3551 let prefix = &old[..pfx];
3558 let suffix = &old[sfx_at_old..];
3559 let old_mid = if pfx <= sfx_at_old {
3560 &old[pfx..sfx_at_old]
3561 } else {
3562 ""
3563 };
3564 let new_mid = if pfx <= sfx_at_new {
3565 &new[pfx..sfx_at_new]
3566 } else {
3567 ""
3568 };
3569
3570 if prefix.is_empty() && suffix.is_empty() {
3571 return format!("{old} => {new}");
3572 }
3573
3574 format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
3575}
3576
3577fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
3579 let old = del.old_path.as_deref().unwrap_or("");
3580 let new = add.new_path.as_deref().unwrap_or("");
3581 let old_base = old.rsplit('/').next().unwrap_or(old);
3582 let new_base = new.rsplit('/').next().unwrap_or(new);
3583 old_base == new_base && !old_base.is_empty()
3584}
3585
3586fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
3591 let old_norm = crate::crlf::crlf_to_lf(old);
3594 let new_norm = crate::crlf::crlf_to_lf(new);
3595
3596 let src_size = old_norm.len();
3597 let dst_size = new_norm.len();
3598
3599 if src_size == 0 && dst_size == 0 {
3600 return 100;
3601 }
3602 let total = src_size + dst_size;
3603 if total == 0 {
3604 return 100;
3605 }
3606
3607 use similar::{ChangeTag, TextDiff};
3609 let old_str = String::from_utf8_lossy(&old_norm);
3610 let new_str = String::from_utf8_lossy(&new_norm);
3611 let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
3612
3613 let mut shared_bytes = 0usize;
3614 for change in diff.iter_all_changes() {
3615 if change.tag() == ChangeTag::Equal {
3616 shared_bytes += change.value().len();
3618 }
3619 }
3620
3621 let max_size = src_size.max(dst_size);
3624
3625 ((shared_bytes * 100) / max_size).min(100) as u32
3626}
3627
3628#[must_use]
3632pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
3633 compute_similarity(old, new)
3634}
3635
3636pub fn format_raw(entry: &DiffEntry) -> String {
3642 let path = match entry.status {
3643 DiffStatus::Renamed | DiffStatus::Copied => {
3644 format!(
3645 "{}\t{}",
3646 entry.old_path.as_deref().unwrap_or(""),
3647 entry.new_path.as_deref().unwrap_or("")
3648 )
3649 }
3650 _ => entry.path().to_owned(),
3651 };
3652
3653 let status_str = match (entry.status, entry.score) {
3654 (DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
3655 (DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
3656 _ => entry.status.letter().to_string(),
3657 };
3658
3659 let (old_hex, new_hex) = raw_oid_hex_pair(&entry.old_oid, &entry.new_oid);
3660 format!(
3661 ":{} {} {} {} {}\t{}",
3662 entry.old_mode, entry.new_mode, old_hex, new_hex, status_str, path
3663 )
3664}
3665
3666fn raw_oid_hex_pair(old: &ObjectId, new: &ObjectId) -> (String, String) {
3674 let width = if !old.is_zero() {
3675 old.algo().hex_len()
3676 } else if !new.is_zero() {
3677 new.algo().hex_len()
3678 } else {
3679 old.algo().hex_len()
3680 };
3681 let render = |oid: &ObjectId| {
3682 if oid.is_zero() {
3683 "0".repeat(width)
3684 } else {
3685 oid.to_hex()
3686 }
3687 };
3688 (render(old), render(new))
3689}
3690
3691pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
3693 let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
3694 "..."
3695 } else {
3696 ""
3697 };
3698 let old_hex = format!("{}", entry.old_oid);
3699 let new_hex = format!("{}", entry.new_oid);
3700 let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
3701 let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
3702
3703 let path = match entry.status {
3705 DiffStatus::Renamed | DiffStatus::Copied => format!(
3706 "{}\t{}",
3707 entry.old_path.as_deref().unwrap_or(""),
3708 entry.new_path.as_deref().unwrap_or("")
3709 ),
3710 _ => entry.path().to_owned(),
3711 };
3712 let status_str = match (entry.status, entry.score) {
3713 (DiffStatus::Renamed, Some(s)) => format!("R{s:03}"),
3714 (DiffStatus::Copied, Some(s)) => format!("C{s:03}"),
3715 _ => entry.status.letter().to_string(),
3716 };
3717
3718 format!(
3719 ":{} {} {}{} {}{} {}\t{}",
3720 entry.old_mode,
3721 entry.new_mode,
3722 old_abbrev,
3723 ellipsis,
3724 new_abbrev,
3725 ellipsis,
3726 status_str,
3727 path
3728 )
3729}
3730
3731pub fn unified_diff(
3746 old_content: &str,
3747 new_content: &str,
3748 old_path: &str,
3749 new_path: &str,
3750 context_lines: usize,
3751 indent_heuristic: bool,
3752 quote_path_fully: bool,
3753) -> String {
3754 unified_diff_with_prefix(
3755 old_content,
3756 new_content,
3757 old_path,
3758 new_path,
3759 context_lines,
3760 0,
3761 "a/",
3762 "b/",
3763 indent_heuristic,
3764 quote_path_fully,
3765 )
3766}
3767
3768#[allow(clippy::too_many_arguments)] pub fn unified_diff_with_prefix(
3774 old_content: &str,
3775 new_content: &str,
3776 old_path: &str,
3777 new_path: &str,
3778 context_lines: usize,
3779 inter_hunk_context: usize,
3780 src_prefix: &str,
3781 dst_prefix: &str,
3782 indent_heuristic: bool,
3783 quote_path_fully: bool,
3784) -> String {
3785 unified_diff_with_prefix_and_funcname(
3786 old_content,
3787 new_content,
3788 old_path,
3789 new_path,
3790 context_lines,
3791 inter_hunk_context,
3792 src_prefix,
3793 dst_prefix,
3794 None,
3795 indent_heuristic,
3796 quote_path_fully,
3797 )
3798}
3799
3800#[allow(clippy::too_many_arguments)]
3803pub fn unified_diff_with_prefix_and_funcname(
3804 old_content: &str,
3805 new_content: &str,
3806 old_path: &str,
3807 new_path: &str,
3808 context_lines: usize,
3809 inter_hunk_context: usize,
3810 src_prefix: &str,
3811 dst_prefix: &str,
3812 funcname_matcher: Option<&FuncnameMatcher>,
3813 indent_heuristic: bool,
3814 quote_path_fully: bool,
3815) -> String {
3816 unified_diff_with_prefix_and_funcname_and_algorithm(
3817 old_content,
3818 new_content,
3819 old_path,
3820 new_path,
3821 context_lines,
3822 inter_hunk_context,
3823 src_prefix,
3824 dst_prefix,
3825 funcname_matcher,
3826 similar::Algorithm::Myers,
3827 false,
3828 false,
3829 indent_heuristic,
3830 quote_path_fully,
3831 )
3832}
3833
3834#[allow(clippy::too_many_arguments)]
3840pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
3841 old_content: &str,
3842 new_content: &str,
3843 old_path: &str,
3844 new_path: &str,
3845 context_lines: usize,
3846 inter_hunk_context: usize,
3847 src_prefix: &str,
3848 dst_prefix: &str,
3849 funcname_matcher: Option<&FuncnameMatcher>,
3850 algorithm: similar::Algorithm,
3851 function_context: bool,
3852 use_git_histogram: bool,
3853 indent_heuristic: bool,
3854 quote_path_fully: bool,
3855) -> String {
3856 if function_context {
3860 return unified_diff_with_function_context(
3861 old_content,
3862 new_content,
3863 old_path,
3864 new_path,
3865 context_lines,
3866 inter_hunk_context,
3867 src_prefix,
3868 dst_prefix,
3869 funcname_matcher,
3870 algorithm,
3871 indent_heuristic,
3872 quote_path_fully,
3873 );
3874 }
3875
3876 if use_git_histogram {
3877 return unified_diff_histogram_with_prefix_and_funcname(
3878 old_content,
3879 new_content,
3880 old_path,
3881 new_path,
3882 context_lines,
3883 inter_hunk_context,
3884 src_prefix,
3885 dst_prefix,
3886 funcname_matcher,
3887 quote_path_fully,
3888 );
3889 }
3890
3891 use crate::quote_path::format_diff_path_with_prefix;
3892 use similar::{udiff::UnifiedDiffHunk, TextDiff};
3893
3894 let diff = TextDiff::configure()
3895 .algorithm(algorithm)
3896 .diff_lines(old_content, new_content);
3897 let compacted_ops = diff_indent_heuristic::diff_lines_ops_compacted(
3898 old_content,
3899 new_content,
3900 algorithm,
3901 indent_heuristic,
3902 );
3903
3904 let mut output = String::new();
3905 if old_path == "/dev/null" {
3906 output.push_str("--- /dev/null\n");
3907 } else if src_prefix.is_empty() {
3908 output.push_str(&format!("--- {old_path}\n"));
3911 } else {
3912 output.push_str("--- ");
3913 output.push_str(&format_diff_path_with_prefix(
3914 src_prefix,
3915 old_path,
3916 quote_path_fully,
3917 ));
3918 output.push('\n');
3919 }
3920 if new_path == "/dev/null" {
3921 output.push_str("+++ /dev/null\n");
3922 } else if dst_prefix.is_empty() {
3923 output.push_str(&format!("+++ {new_path}\n"));
3924 } else {
3925 output.push_str("+++ ");
3926 output.push_str(&format_diff_path_with_prefix(
3927 dst_prefix,
3928 new_path,
3929 quote_path_fully,
3930 ));
3931 output.push('\n');
3932 }
3933
3934 let old_lines: Vec<&str> = old_content.lines().collect();
3935
3936 let max_common_gap = context_lines
3942 .saturating_mul(2)
3943 .saturating_add(inter_hunk_context);
3944 let op_groups = group_diff_ops_gap(compacted_ops, context_lines, max_common_gap);
3945
3946 for ops in op_groups {
3947 if ops.is_empty() {
3948 continue;
3949 }
3950 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
3951 let hunk_str = format!("{hunk}");
3952 if let Some(first_newline) = hunk_str.find('\n') {
3956 let header_line = &hunk_str[..first_newline];
3957 let rest = &hunk_str[first_newline..];
3958
3959 if let Some(func_ctx) =
3961 extract_function_context(header_line, &old_lines, funcname_matcher)
3962 {
3963 output.push_str(header_line);
3964 output.push(' ');
3965 output.push_str(&func_ctx);
3966 output.push_str(rest);
3967 } else {
3968 output.push_str(&hunk_str);
3969 }
3970 } else {
3971 output.push_str(&hunk_str);
3972 }
3973 }
3974
3975 output
3976}
3977
3978fn group_diff_ops_gap(
3984 mut ops: Vec<similar::DiffOp>,
3985 context: usize,
3986 max_common_gap: usize,
3987) -> Vec<Vec<similar::DiffOp>> {
3988 use similar::DiffOp;
3989 if ops.is_empty() {
3990 return vec![];
3991 }
3992
3993 let mut pending_group = Vec::new();
3994 let mut rv = Vec::new();
3995
3996 if let Some(DiffOp::Equal {
3997 old_index,
3998 new_index,
3999 len,
4000 }) = ops.first_mut()
4001 {
4002 let offset = (*len).saturating_sub(context);
4003 *old_index += offset;
4004 *new_index += offset;
4005 *len -= offset;
4006 }
4007
4008 if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
4009 *len -= (*len).saturating_sub(context);
4010 }
4011
4012 for op in ops.into_iter() {
4013 if let DiffOp::Equal {
4014 old_index,
4015 new_index,
4016 len,
4017 } = op
4018 {
4019 if len > max_common_gap {
4022 pending_group.push(DiffOp::Equal {
4023 old_index,
4024 new_index,
4025 len: context,
4026 });
4027 rv.push(pending_group);
4028 let offset = len.saturating_sub(context);
4029 pending_group = vec![DiffOp::Equal {
4030 old_index: old_index + offset,
4031 new_index: new_index + offset,
4032 len: len - offset,
4033 }];
4034 continue;
4035 }
4036 }
4037 pending_group.push(op);
4038 }
4039
4040 match &pending_group[..] {
4041 &[] | &[similar::DiffOp::Equal { .. }] => {}
4042 _ => rv.push(pending_group),
4043 }
4044
4045 rv
4046}
4047
4048fn unified_diff_with_function_context(
4050 old_content: &str,
4051 new_content: &str,
4052 old_path: &str,
4053 new_path: &str,
4054 context_lines: usize,
4055 inter_hunk_context: usize,
4056 src_prefix: &str,
4057 dst_prefix: &str,
4058 funcname_matcher: Option<&FuncnameMatcher>,
4059 algorithm: similar::Algorithm,
4060 indent_heuristic: bool,
4061 quote_path_fully: bool,
4062) -> String {
4063 use crate::quote_path::format_diff_path_with_prefix;
4064 use similar::{udiff::UnifiedDiffHunk, TextDiff};
4065
4066 let diff = TextDiff::configure()
4067 .algorithm(algorithm)
4068 .diff_lines(old_content, new_content);
4069
4070 let old_lines: Vec<&str> = old_content.lines().collect();
4071 let new_lines: Vec<&str> = new_content.lines().collect();
4072 let n_old = old_lines.len();
4073 let n_new = new_lines.len();
4074
4075 let max_common_gap = context_lines
4082 .saturating_mul(2)
4083 .saturating_add(inter_hunk_context);
4084 let all_ops = diff.ops().to_vec();
4085 let op_groups = group_diff_ops_gap(all_ops.clone(), context_lines, max_common_gap);
4086
4087 let mut ranges: Vec<(usize, usize, usize, usize)> = Vec::new();
4088
4089 for ops in op_groups {
4090 if ops.is_empty() {
4091 continue;
4092 }
4093 let i1_anchor = func_context_old_anchor(&ops, n_old);
4094 let i1_end = hunk_old_change_end_exclusive(&ops);
4095 let skip_preimage_pull =
4096 append_with_whole_function_added(&ops, n_old, n_new, &new_lines, funcname_matcher);
4097 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
4098 let hunk_str = format!("{hunk}");
4099 let header_line = hunk_str
4100 .lines()
4101 .next()
4102 .unwrap_or("")
4103 .trim_end_matches(['\r', '\n']);
4104 let Some((base_s1, _base_e1, _base_s2, _base_e2)) =
4105 parse_unified_hunk_header_ranges(header_line)
4106 else {
4107 continue;
4108 };
4109
4110 let ctx = context_lines;
4111 let (s1, e1, s2, e2) = if skip_preimage_pull {
4112 let s = n_old.saturating_sub(ctx);
4113 let s2 = map_old_line_to_new(&all_ops, s, n_new).min(n_new);
4114 (s, n_old, s2, n_new)
4115 } else {
4116 let mut s1 = base_s1.saturating_sub(ctx);
4117 let mut s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
4118
4119 let base_pre_s1 = i1_anchor.saturating_sub(ctx);
4120 if base_pre_s1 < s1 {
4121 s1 = base_pre_s1;
4122 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
4123 }
4124
4125 let fs1 = expand_func_pre_start(s1, i1_anchor, n_old, &old_lines, funcname_matcher);
4126 if fs1 < s1 {
4127 s1 = fs1;
4128 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
4129 }
4130
4131 let mut e1 = (i1_end + ctx).min(n_old);
4137 let mut e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
4138 let fe1 = expand_func_post_end(e1, i1_end, n_old, &old_lines, funcname_matcher);
4139 if fe1 > e1 {
4140 e1 = fe1;
4141 e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
4142 }
4143 (s1, e1, s2, e2)
4144 };
4145
4146 ranges.push((s1, e1, s2, e2));
4147 }
4148
4149 ranges.sort_by_key(|r| (r.0, r.2));
4152 let mut merged: Vec<(usize, usize, usize, usize)> = Vec::with_capacity(ranges.len());
4153 for (s1, e1, s2, e2) in ranges {
4154 if let Some(last) = merged.last_mut() {
4155 if s1 < last.1 {
4156 last.1 = last.1.max(e1);
4157 last.3 = last.3.max(e2);
4158 continue;
4159 }
4160 }
4161 merged.push((s1, e1, s2, e2));
4162 }
4163 let ranges = merged;
4164
4165 let mut output = String::new();
4166 if old_path == "/dev/null" {
4167 output.push_str("--- /dev/null\n");
4168 } else if src_prefix.is_empty() {
4169 output.push_str(&format!("--- {old_path}\n"));
4170 } else {
4171 output.push_str("--- ");
4172 output.push_str(&format_diff_path_with_prefix(
4173 src_prefix,
4174 old_path,
4175 quote_path_fully,
4176 ));
4177 output.push('\n');
4178 }
4179 if new_path == "/dev/null" {
4180 output.push_str("+++ /dev/null\n");
4181 } else if dst_prefix.is_empty() {
4182 output.push_str(&format!("+++ {new_path}\n"));
4183 } else {
4184 output.push_str("+++ ");
4185 output.push_str(&format_diff_path_with_prefix(
4186 dst_prefix,
4187 new_path,
4188 quote_path_fully,
4189 ));
4190 output.push('\n');
4191 }
4192
4193 for (s1, e1, s2, e2) in ranges {
4194 if s1 >= e1 && s2 >= e2 {
4195 continue;
4196 }
4197 let old_seg =
4198 line_slice_for_diff_with_eof_nl(&old_lines, s1, e1, old_content.ends_with('\n'));
4199 let new_seg =
4200 line_slice_for_diff_with_eof_nl(&new_lines, s2, e2, new_content.ends_with('\n'));
4201 let inner_ctx = old_seg.lines().count().max(new_seg.lines().count()).max(1);
4202 let piece = unified_diff_with_prefix_and_funcname_and_algorithm(
4203 &old_seg,
4204 &new_seg,
4205 old_path,
4206 new_path,
4207 inner_ctx,
4208 0,
4209 src_prefix,
4210 dst_prefix,
4211 funcname_matcher,
4212 algorithm,
4213 false,
4214 false,
4215 indent_heuristic,
4216 quote_path_fully,
4217 );
4218 let shifted = shift_unified_hunk_headers_to_full_file(&piece, s1, s2);
4219 let with_func =
4220 enrich_unified_hunk_headers_funcname(&shifted, &old_lines, funcname_matcher);
4221 for line in with_func.lines() {
4222 if line.starts_with("--- ") || line.starts_with("+++ ") {
4223 continue;
4224 }
4225 output.push_str(line);
4226 output.push('\n');
4227 }
4228 }
4229
4230 output
4231}
4232
4233fn shift_unified_hunk_headers_to_full_file(
4238 patch: &str,
4239 delta_old: usize,
4240 delta_new: usize,
4241) -> String {
4242 if delta_old == 0 && delta_new == 0 {
4243 return patch.to_owned();
4244 }
4245 let mut out = String::with_capacity(patch.len());
4246 for line in patch.lines() {
4247 if let Some(shifted) = shift_one_unified_hunk_header(line, delta_old, delta_new) {
4248 out.push_str(&shifted);
4249 } else {
4250 out.push_str(line);
4251 }
4252 out.push('\n');
4253 }
4254 out
4255}
4256
4257fn shift_one_unified_hunk_header(line: &str, delta_old: usize, delta_new: usize) -> Option<String> {
4258 let rest = line.strip_prefix("@@ ")?;
4259 let (old_chunk, after_plus) = rest.split_once(" +")?;
4260 let old_spec = old_chunk.strip_prefix('-')?;
4261 let (new_spec, suffix) = after_plus.split_once(" @@")?;
4262 let shifted_old = shift_unified_range_spec(old_spec, delta_old)?;
4263 let shifted_new = shift_unified_range_spec(new_spec, delta_new)?;
4264 Some(format!("@@ -{shifted_old} +{shifted_new} @@{suffix}"))
4265}
4266
4267fn shift_unified_range_spec(spec: &str, delta: usize) -> Option<String> {
4268 let spec = spec.trim();
4269 if let Some((start_s, count_s)) = spec.split_once(',') {
4270 let start: usize = start_s.parse().ok()?;
4271 let count: usize = count_s.parse().ok()?;
4272 Some(format!("{},{}", start.saturating_add(delta), count))
4273 } else {
4274 let start: usize = spec.parse().ok()?;
4275 Some(format!("{}", start.saturating_add(delta)))
4276 }
4277}
4278
4279fn enrich_unified_hunk_headers_funcname(
4281 patch: &str,
4282 full_old_lines: &[&str],
4283 funcname_matcher: Option<&FuncnameMatcher>,
4284) -> String {
4285 let mut out = String::with_capacity(patch.len());
4286 for line in patch.lines() {
4287 if let Some(fixed) = enrich_one_hunk_header_funcname(line, full_old_lines, funcname_matcher)
4288 {
4289 out.push_str(&fixed);
4290 } else {
4291 out.push_str(line);
4292 }
4293 out.push('\n');
4294 }
4295 out
4296}
4297
4298fn enrich_one_hunk_header_funcname(
4299 line: &str,
4300 full_old_lines: &[&str],
4301 funcname_matcher: Option<&FuncnameMatcher>,
4302) -> Option<String> {
4303 let after_at = line.strip_prefix("@@ ")?;
4304 let idx = after_at.find(" @@")?;
4305 let mid = after_at[..idx].trim();
4306 let tail = after_at[idx + 3..].trim_start();
4307 let header_for_parse = format!("@@ {mid} @@");
4308 let func = extract_function_context(&header_for_parse, full_old_lines, funcname_matcher);
4309 Some(if let Some(f) = func {
4310 format!("@@ {mid} @@ {f}")
4311 } else if !tail.is_empty() {
4312 format!("@@ {mid} @@ {tail}")
4313 } else {
4314 format!("@@ {mid} @@")
4315 })
4316}
4317
4318fn line_slice_for_diff_with_eof_nl(
4319 lines: &[&str],
4320 start: usize,
4321 end: usize,
4322 full_file_ends_with_newline: bool,
4323) -> String {
4324 if start >= end {
4325 return String::new();
4326 }
4327 let mut s = lines[start..end].join("\n");
4328 let slice_is_suffix_of_file = end == lines.len();
4329 let need_trailing_nl = if slice_is_suffix_of_file {
4330 full_file_ends_with_newline
4331 } else {
4332 true
4333 };
4334 if need_trailing_nl && !s.ends_with('\n') {
4335 s.push('\n');
4336 }
4337 s
4338}
4339
4340fn map_old_line_to_new(ops: &[similar::DiffOp], old_line: usize, n_new: usize) -> usize {
4343 use similar::DiffOp;
4344 let mut n = 0usize;
4345 for op in ops {
4346 match *op {
4347 DiffOp::Equal {
4348 old_index,
4349 new_index,
4350 len,
4351 } => {
4352 if old_index + len <= old_line {
4353 n = new_index + len;
4354 continue;
4355 }
4356 if old_index < old_line {
4357 let take = old_line - old_index;
4358 return (new_index + take).min(n_new);
4359 }
4360 return new_index.min(n_new);
4361 }
4362 DiffOp::Delete {
4363 old_index,
4364 old_len,
4365 new_index,
4366 } => {
4367 if old_index + old_len <= old_line {
4368 n = new_index;
4369 continue;
4370 }
4371 if old_index < old_line {
4372 return new_index.min(n_new);
4373 }
4374 }
4375 DiffOp::Insert {
4376 old_index,
4377 new_index,
4378 new_len,
4379 } => {
4380 if old_index < old_line {
4381 n = new_index + new_len;
4382 continue;
4383 }
4384 if old_index == old_line {
4385 return (new_index + new_len).min(n_new);
4388 }
4389 return new_index.min(n_new);
4390 }
4391 DiffOp::Replace {
4392 old_index,
4393 old_len,
4394 new_index,
4395 new_len,
4396 } => {
4397 if old_index + old_len <= old_line {
4398 n = new_index + new_len;
4399 continue;
4400 }
4401 if old_index < old_line {
4402 let into_old = old_line - old_index;
4403 let mapped = new_index + into_old.min(new_len);
4404 return mapped.min(n_new);
4405 }
4406 return new_index.min(n_new);
4407 }
4408 }
4409 }
4410 n.min(n_new)
4411}
4412
4413fn parse_unified_hunk_header_ranges(header: &str) -> Option<(usize, usize, usize, usize)> {
4415 let rest = header.strip_prefix("@@ ")?;
4416 let (old_tok, rest2) = rest.split_once(" +")?;
4417 let old_tok = old_tok.strip_prefix('-')?;
4418 let new_tok = rest2.split_once(" @@").map(|(a, _)| a)?;
4419
4420 fn parse_side(spec: &str) -> Option<(usize, usize)> {
4421 let spec = spec.trim();
4422 let (start_one_based, count) = if let Some((a, b)) = spec.split_once(',') {
4423 (a.parse::<usize>().ok()?, b.parse::<usize>().ok()?)
4424 } else {
4425 let s = spec.parse::<usize>().ok()?;
4426 (s, 1usize)
4427 };
4428 let s0 = start_one_based.saturating_sub(1);
4429 let e0 = s0.saturating_add(count);
4430 Some((s0, e0))
4431 }
4432
4433 let (os, oe) = parse_side(old_tok)?;
4434 let (ns, ne) = parse_side(new_tok)?;
4435 Some((os, oe, ns, ne))
4436}
4437
4438fn append_with_whole_function_added(
4441 ops: &[similar::DiffOp],
4442 n_old: usize,
4443 n_new: usize,
4444 new_lines: &[&str],
4445 matcher: Option<&FuncnameMatcher>,
4446) -> bool {
4447 use similar::DiffOp;
4448 if n_old == 0 {
4449 return false;
4450 }
4451 let mut only_ins_or_eq = true;
4452 let mut min_new_ins = usize::MAX;
4453 for op in ops {
4454 match *op {
4455 DiffOp::Equal { .. } => {}
4456 DiffOp::Insert {
4457 new_index, new_len, ..
4458 } => {
4459 min_new_ins = min_new_ins.min(new_index);
4460 if new_len == 0 {
4461 only_ins_or_eq = false;
4462 }
4463 }
4464 DiffOp::Delete { .. } | DiffOp::Replace { .. } => {
4465 only_ins_or_eq = false;
4466 }
4467 }
4468 }
4469 let mut insert_at_eof = false;
4470 for op in ops {
4471 if let DiffOp::Insert { old_index, .. } = *op {
4472 if old_index == n_old {
4473 insert_at_eof = true;
4474 break;
4475 }
4476 }
4477 }
4478 let append_at_eof = min_new_ins == n_old || insert_at_eof;
4479 if !only_ins_or_eq || !append_at_eof || min_new_ins == usize::MAX {
4480 return false;
4481 }
4482 let mut j = min_new_ins;
4487 while j < n_new {
4488 let line = new_lines[j];
4489 if line.trim().is_empty() {
4490 j += 1;
4491 continue;
4492 }
4493 if let Some(m) = matcher {
4494 if m.match_line(line).is_some() {
4495 return true;
4496 }
4497 } else if inserted_block_starts_with_c_like_function_definition(line) {
4498 return true;
4499 }
4500 j += 1;
4501 }
4502 false
4503}
4504
4505fn inserted_block_starts_with_c_like_function_definition(line: &str) -> bool {
4506 let t = line.trim_start();
4507 let Some(open_paren) = t.find('(') else {
4508 return false;
4509 };
4510 let head = &t[..open_paren];
4511 let tokens: Vec<&str> = head.split_whitespace().collect();
4512 if tokens.len() < 2 {
4513 return false;
4515 }
4516 let nameish = tokens.last().copied().unwrap_or("");
4517 let name = nameish.trim_end_matches(['*', '&']);
4518 if name.is_empty() || !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
4519 return false;
4520 }
4521 let type_or_modifier = |tok: &str| {
4522 matches!(
4523 tok,
4524 "static"
4525 | "extern"
4526 | "inline"
4527 | "void"
4528 | "int"
4529 | "char"
4530 | "short"
4531 | "long"
4532 | "float"
4533 | "double"
4534 | "unsigned"
4535 | "signed"
4536 | "struct"
4537 | "enum"
4538 | "union"
4539 | "const"
4540 | "volatile"
4541 | "typedef"
4542 )
4543 };
4544 tokens[..tokens.len() - 1]
4545 .iter()
4546 .any(|tok| type_or_modifier(tok))
4547}
4548
4549fn hunk_old_change_end_exclusive(ops: &[similar::DiffOp]) -> usize {
4550 use similar::DiffOp;
4551 let mut max_o = 0usize;
4552 for op in ops {
4553 match *op {
4554 DiffOp::Delete {
4555 old_index, old_len, ..
4556 } => {
4557 max_o = max_o.max(old_index + old_len);
4558 }
4559 DiffOp::Replace {
4560 old_index, old_len, ..
4561 } => {
4562 max_o = max_o.max(old_index + old_len);
4563 }
4564 DiffOp::Insert { old_index, .. } => {
4565 max_o = max_o.max(old_index);
4568 }
4569 DiffOp::Equal { .. } => {}
4570 }
4571 }
4572 max_o
4573}
4574
4575fn func_context_old_anchor(ops: &[similar::DiffOp], n_old: usize) -> usize {
4576 use similar::DiffOp;
4577 let mut has_delete_or_replace = false;
4578 let mut min_del = usize::MAX;
4579 let mut min_ins_old = usize::MAX;
4580
4581 for op in ops {
4582 match *op {
4583 DiffOp::Delete {
4584 old_index, old_len, ..
4585 } => {
4586 has_delete_or_replace = true;
4587 min_del = min_del.min(old_index);
4588 min_del = min_del.min(old_index + old_len.saturating_sub(1));
4589 }
4590 DiffOp::Replace {
4591 old_index, old_len, ..
4592 } => {
4593 has_delete_or_replace = true;
4594 min_del = min_del.min(old_index);
4595 min_del = min_del.min(old_index + old_len.saturating_sub(1));
4596 }
4597 DiffOp::Insert { old_index, .. } => {
4598 min_ins_old = min_ins_old.min(old_index);
4599 }
4600 DiffOp::Equal { .. } => {}
4601 }
4602 }
4603
4604 let mut i1 = if has_delete_or_replace {
4605 min_del
4606 } else if min_ins_old != usize::MAX {
4607 min_ins_old
4608 } else {
4609 0
4610 };
4611
4612 let pure_insert = ops
4613 .iter()
4614 .all(|op| matches!(op, DiffOp::Insert { .. } | DiffOp::Equal { .. }))
4615 && ops.iter().any(|op| matches!(op, DiffOp::Insert { .. }));
4616
4617 if pure_insert && i1 >= n_old && n_old > 0 {
4618 i1 = n_old - 1;
4619 }
4620
4621 i1.min(n_old.saturating_sub(1))
4622}
4623
4624fn expand_func_pre_start(
4625 s1: usize,
4626 i1: usize,
4627 n_old: usize,
4628 old_lines: &[&str],
4629 matcher: Option<&FuncnameMatcher>,
4630) -> usize {
4631 if n_old == 0 {
4632 return s1;
4633 }
4634 let i1 = i1.min(n_old.saturating_sub(1));
4635 let mut fs1 = get_func_line_backward(old_lines, i1, matcher).unwrap_or(i1);
4636 while fs1 > 0
4637 && !is_line_empty_for_func_context(old_lines[fs1 - 1])
4638 && !is_func_line(old_lines[fs1 - 1], matcher)
4639 {
4640 fs1 -= 1;
4641 }
4642 s1.min(fs1)
4643}
4644
4645fn expand_func_post_end(
4646 e1: usize,
4647 i1_end: usize,
4648 n_old: usize,
4649 old_lines: &[&str],
4650 matcher: Option<&FuncnameMatcher>,
4651) -> usize {
4652 let from = i1_end.min(n_old);
4653 let fe1 = get_func_line_forward(old_lines, from, matcher).unwrap_or(n_old);
4654 let mut fe1_adj = fe1;
4655 while fe1_adj > 0 && is_line_empty_for_func_context(old_lines[fe1_adj - 1]) {
4656 fe1_adj -= 1;
4657 }
4658 e1.max(fe1_adj).min(n_old)
4659}
4660
4661fn is_line_empty_for_func_context(line: &str) -> bool {
4662 line.chars().all(|c| c.is_whitespace())
4663}
4664
4665fn is_func_line(line: &str, matcher: Option<&FuncnameMatcher>) -> bool {
4666 if let Some(m) = matcher {
4667 return m.match_line(line).is_some();
4668 }
4669 let t = line.trim_end_matches(['\n', '\r']);
4670 if t.is_empty() {
4671 return false;
4672 }
4673 let b = t.as_bytes()[0];
4674 b.is_ascii_alphabetic() || b == b'_' || b == b'$'
4675}
4676
4677fn get_func_line_backward(
4678 old_lines: &[&str],
4679 start: usize,
4680 matcher: Option<&FuncnameMatcher>,
4681) -> Option<usize> {
4682 let mut l = start.min(old_lines.len().saturating_sub(1));
4683 if old_lines.is_empty() {
4684 return None;
4685 }
4686 loop {
4687 if is_func_line(old_lines[l], matcher) {
4688 return Some(l);
4689 }
4690 if l == 0 {
4691 break;
4692 }
4693 l -= 1;
4694 }
4695 None
4696}
4697
4698fn get_func_line_forward(
4699 old_lines: &[&str],
4700 start: usize,
4701 matcher: Option<&FuncnameMatcher>,
4702) -> Option<usize> {
4703 let mut l = start;
4704 while l < old_lines.len() {
4705 if is_func_line(old_lines[l], matcher) {
4706 return Some(l);
4707 }
4708 l += 1;
4709 }
4710 None
4711}
4712
4713pub fn anchored_unified_diff(
4723 old_content: &str,
4724 new_content: &str,
4725 old_path: &str,
4726 new_path: &str,
4727 context_lines: usize,
4728 anchors: &[String],
4729 algorithm: similar::Algorithm,
4730 use_git_histogram: bool,
4731 indent_heuristic: bool,
4732 quote_path_fully: bool,
4733) -> String {
4734 use crate::quote_path::format_diff_path_with_prefix;
4735 use similar::TextDiff;
4736
4737 let old_lines: Vec<&str> = old_content.lines().collect();
4738 let new_lines: Vec<&str> = new_content.lines().collect();
4739
4740 let mut anchor_pairs: Vec<(usize, usize)> = Vec::new(); for anchor in anchors {
4744 let anchor_str = anchor.as_str();
4745
4746 let old_positions: Vec<usize> = old_lines
4748 .iter()
4749 .enumerate()
4750 .filter(|(_, l)| l.trim_end() == anchor_str)
4751 .map(|(i, _)| i)
4752 .collect();
4753
4754 let new_positions: Vec<usize> = new_lines
4756 .iter()
4757 .enumerate()
4758 .filter(|(_, l)| l.trim_end() == anchor_str)
4759 .map(|(i, _)| i)
4760 .collect();
4761
4762 if old_positions.len() == 1 && new_positions.len() == 1 {
4764 anchor_pairs.push((old_positions[0], new_positions[0]));
4765 }
4766 }
4767
4768 if anchor_pairs.is_empty() {
4770 return unified_diff_with_prefix_and_funcname_and_algorithm(
4771 old_content,
4772 new_content,
4773 old_path,
4774 new_path,
4775 context_lines,
4776 0,
4777 "a/",
4778 "b/",
4779 None,
4780 algorithm,
4781 false,
4782 use_git_histogram,
4783 indent_heuristic,
4784 quote_path_fully,
4785 );
4786 }
4787
4788 anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
4790
4791 let mut filtered: Vec<(usize, usize)> = Vec::new();
4794 for &pair in &anchor_pairs {
4795 if filtered.is_empty() || filtered.last().is_some_and(|last| pair.1 > last.1) {
4796 filtered.push(pair);
4797 }
4798 }
4799 let anchor_pairs = filtered;
4800
4801 struct LineDiffOp {
4810 tag: char, line: String,
4812 }
4813
4814 let append_segment_diff =
4815 |ops: &mut Vec<LineDiffOp>, old_seg_input: &str, new_seg_input: &str| {
4816 use similar::ChangeTag;
4817 let old_ls: Vec<&str> = old_seg_input.lines().collect();
4818 let new_ls: Vec<&str> = new_seg_input.lines().collect();
4819 if old_ls.is_empty() && new_ls.is_empty() {
4820 return;
4821 }
4822 let seg_diff = TextDiff::configure()
4823 .algorithm(algorithm)
4824 .diff_slices(&old_ls, &new_ls);
4825 let raw = seg_diff.ops().to_vec();
4826 let compacted = diff_indent_heuristic::apply_change_compact_to_ops(
4827 &raw,
4828 &old_ls,
4829 &new_ls,
4830 indent_heuristic,
4831 );
4832 for op in &compacted {
4833 for ch in op.iter_changes(&old_ls, &new_ls) {
4834 let t = match ch.tag() {
4835 ChangeTag::Equal => ' ',
4836 ChangeTag::Delete => '-',
4837 ChangeTag::Insert => '+',
4838 };
4839 ops.push(LineDiffOp {
4840 tag: t,
4841 line: ch.value().to_string(),
4842 });
4843 }
4844 }
4845 };
4846
4847 let mut ops: Vec<LineDiffOp> = Vec::new();
4848 let mut old_pos = 0usize;
4849 let mut new_pos = 0usize;
4850
4851 for &(old_anchor, new_anchor) in &anchor_pairs {
4852 let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
4854 let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
4855
4856 let old_seg_text = old_segment.join("\n");
4857 let new_seg_text = new_segment.join("\n");
4858
4859 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
4860 let old_seg_input = if old_seg_text.is_empty() {
4861 String::new()
4862 } else {
4863 format!("{}\n", old_seg_text)
4864 };
4865 let new_seg_input = if new_seg_text.is_empty() {
4866 String::new()
4867 } else {
4868 format!("{}\n", new_seg_text)
4869 };
4870 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
4871 }
4872
4873 ops.push(LineDiffOp {
4875 tag: ' ',
4876 line: old_lines[old_anchor].to_string(),
4877 });
4878
4879 old_pos = old_anchor + 1;
4880 new_pos = new_anchor + 1;
4881 }
4882
4883 let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
4885 let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
4886 let old_seg_text = old_segment.join("\n");
4887 let new_seg_text = new_segment.join("\n");
4888
4889 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
4890 let old_seg_input = if old_seg_text.is_empty() {
4891 String::new()
4892 } else {
4893 format!("{}\n", old_seg_text)
4894 };
4895 let new_seg_input = if new_seg_text.is_empty() {
4896 String::new()
4897 } else {
4898 format!("{}\n", new_seg_text)
4899 };
4900 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
4901 }
4902
4903 let mut output = String::new();
4905 if old_path == "/dev/null" {
4906 output.push_str("--- /dev/null\n");
4907 } else {
4908 output.push_str("--- ");
4909 output.push_str(&format_diff_path_with_prefix(
4910 "a/",
4911 old_path,
4912 quote_path_fully,
4913 ));
4914 output.push('\n');
4915 }
4916 if new_path == "/dev/null" {
4917 output.push_str("+++ /dev/null\n");
4918 } else {
4919 output.push_str("+++ ");
4920 output.push_str(&format_diff_path_with_prefix(
4921 "b/",
4922 new_path,
4923 quote_path_fully,
4924 ));
4925 output.push('\n');
4926 }
4927
4928 let total_ops = ops.len();
4930 if total_ops == 0 {
4931 return output;
4932 }
4933
4934 let mut hunks: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
4937 while i < total_ops {
4938 if ops[i].tag != ' ' {
4939 let start = i.saturating_sub(context_lines);
4940 let mut end = i;
4941 while end < total_ops {
4943 if ops[end].tag != ' ' {
4944 end += 1;
4945 continue;
4946 }
4947 let mut next_change = end;
4949 while next_change < total_ops && ops[next_change].tag == ' ' {
4950 next_change += 1;
4951 }
4952 if next_change < total_ops && next_change - end <= context_lines * 2 {
4953 end = next_change + 1;
4954 } else {
4955 end = (end + context_lines).min(total_ops);
4956 break;
4957 }
4958 }
4959 if let Some(last) = hunks.last_mut() {
4961 if start <= last.1 {
4962 last.1 = end;
4963 } else {
4964 hunks.push((start, end));
4965 }
4966 } else {
4967 hunks.push((start, end));
4968 }
4969 i = end;
4970 } else {
4971 i += 1;
4972 }
4973 }
4974
4975 for (start, end) in hunks {
4977 let mut old_start = 1usize;
4979 let mut new_start = 1usize;
4980 for op in &ops[..start] {
4982 match op.tag {
4983 ' ' => {
4984 old_start += 1;
4985 new_start += 1;
4986 }
4987 '-' => {
4988 old_start += 1;
4989 }
4990 '+' => {
4991 new_start += 1;
4992 }
4993 _ => {}
4994 }
4995 }
4996 let mut old_count = 0usize;
4997 let mut new_count = 0usize;
4998 for op in &ops[start..end] {
4999 match op.tag {
5000 ' ' => {
5001 old_count += 1;
5002 new_count += 1;
5003 }
5004 '-' => {
5005 old_count += 1;
5006 }
5007 '+' => {
5008 new_count += 1;
5009 }
5010 _ => {}
5011 }
5012 }
5013
5014 output.push_str(&format!(
5015 "@@ -{},{} +{},{} @@\n",
5016 old_start, old_count, new_start, new_count
5017 ));
5018 for op in &ops[start..end] {
5019 output.push(op.tag);
5020 output.push_str(&op.line);
5021 output.push('\n');
5022 }
5023 }
5024
5025 output
5026}
5027
5028fn extract_function_context(
5034 header: &str,
5035 old_lines: &[&str],
5036 funcname_matcher: Option<&FuncnameMatcher>,
5037) -> Option<String> {
5038 let at_pos = header.find("-")?;
5040 let rest = &header[at_pos + 1..];
5041 let comma_or_space = rest.find([',', ' '])?;
5042 let start_str = &rest[..comma_or_space];
5043 let start_line: usize = start_str.parse().ok()?;
5044
5045 let old_token_end = rest.find([' ', '\t']).unwrap_or(rest.len());
5049 let old_token = &rest[..old_token_end];
5050 let old_count: usize = if let Some(comma) = old_token.find(',') {
5051 old_token[comma + 1..].parse().unwrap_or(1)
5052 } else {
5053 1
5054 };
5055
5056 if start_line == 0 {
5057 return None;
5058 }
5059
5060 let search_end = if old_count == 0 {
5067 start_line.min(old_lines.len())
5068 } else {
5069 if start_line <= 1 {
5070 return None;
5071 }
5072 (start_line - 1).min(old_lines.len())
5073 };
5074 let truncate = |text: &str| {
5075 if text.len() > 80 {
5076 let mut end = 80;
5077 while end > 0 && !text.is_char_boundary(end) {
5078 end -= 1;
5079 }
5080 text[..end].to_owned()
5081 } else {
5082 text.to_owned()
5083 }
5084 };
5085
5086 for i in (0..search_end).rev() {
5087 let line = old_lines[i];
5088 if line.is_empty() {
5089 continue;
5090 }
5091 if let Some(matcher) = funcname_matcher {
5092 if let Some(matched) = matcher.match_line(line) {
5093 return Some(truncate(&matched));
5094 }
5095 continue;
5096 }
5097
5098 let first = line.as_bytes()[0];
5099 if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
5100 return Some(truncate(line.trim_end_matches(char::is_whitespace)));
5101 }
5102 }
5103 None
5104}
5105
5106pub fn format_stat_line(
5110 path: &str,
5111 insertions: usize,
5112 deletions: usize,
5113 max_path_len: usize,
5114) -> String {
5115 format_stat_line_width(path, insertions, deletions, max_path_len, 0)
5116}
5117
5118pub fn format_stat_line_width(
5119 path: &str,
5120 insertions: usize,
5121 deletions: usize,
5122 max_path_len: usize,
5123 count_width: usize,
5124) -> String {
5125 let total = insertions + deletions;
5126 let plus = "+".repeat(insertions.min(50));
5127 let minus = "-".repeat(deletions.min(50));
5128 let cw = if count_width > 0 {
5129 count_width
5130 } else {
5131 format!("{}", total).len()
5132 };
5133 let bar = format!("{}{}", plus, minus);
5134 if bar.is_empty() {
5135 format!(
5136 " {:<width$} | {:>cw$}",
5137 path,
5138 total,
5139 width = max_path_len,
5140 cw = cw
5141 )
5142 } else {
5143 format!(
5144 " {:<width$} | {:>cw$} {}",
5145 path,
5146 total,
5147 bar,
5148 width = max_path_len,
5149 cw = cw
5150 )
5151 }
5152}
5153
5154#[must_use]
5156pub fn normalize_ignore_space_change_line(line: &str) -> String {
5157 let mut result = String::with_capacity(line.len());
5158 let mut in_space = false;
5159 for c in line.chars() {
5160 if c.is_whitespace() {
5161 if !in_space {
5162 result.push(' ');
5163 in_space = true;
5164 }
5165 } else {
5166 result.push(c);
5167 in_space = false;
5168 }
5169 }
5170 while result.ends_with(' ') {
5171 result.pop();
5172 }
5173 result
5174}
5175
5176#[must_use]
5182pub fn normalize_ignore_space_change(content: &str) -> String {
5183 content
5184 .lines()
5185 .map(normalize_ignore_space_change_line)
5186 .collect::<Vec<_>>()
5187 .join("\n")
5188}
5189
5190pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
5194 count_changes_with_algorithm(old_content, new_content, similar::Algorithm::Myers, false)
5195}
5196
5197#[must_use]
5202pub fn count_changes_with_algorithm(
5203 old_content: &str,
5204 new_content: &str,
5205 algorithm: similar::Algorithm,
5206 use_git_histogram: bool,
5207) -> (usize, usize) {
5208 if use_git_histogram {
5209 use imara_diff::{Algorithm, Diff, InternedInput};
5210 let input = InternedInput::new(old_content, new_content);
5211 let mut d = Diff::compute(Algorithm::Histogram, &input);
5212 d.postprocess_lines(&input);
5213 return (d.count_additions() as usize, d.count_removals() as usize);
5214 }
5215
5216 use similar::{ChangeTag, TextDiff};
5217
5218 let diff = TextDiff::configure()
5219 .algorithm(algorithm)
5220 .diff_lines(old_content, new_content);
5221 let mut ins = 0;
5222 let mut del = 0;
5223
5224 for change in diff.iter_all_changes() {
5225 match change.tag() {
5226 ChangeTag::Insert => ins += 1,
5227 ChangeTag::Delete => del += 1,
5228 ChangeTag::Equal => {}
5229 }
5230 }
5231
5232 (ins, del)
5233}
5234
5235#[must_use]
5240pub fn count_git_lines(data: &[u8]) -> usize {
5241 if data.is_empty() {
5242 return 0;
5243 }
5244 let mut count = 0usize;
5245 let mut nl_just_seen = false;
5246 for &ch in data {
5247 if ch == b'\n' {
5248 count += 1;
5249 nl_just_seen = true;
5250 } else {
5251 nl_just_seen = false;
5252 }
5253 }
5254 if !nl_just_seen {
5255 count += 1;
5256 }
5257 count
5258}
5259
5260pub const GIT_DIFF_MAX_SCORE: u64 = 60_000;
5262const DIFF_MAX_SCORE: u64 = GIT_DIFF_MAX_SCORE;
5263const DIFF_MINIMUM_BREAK_SIZE: usize = 400;
5264const DIFF_DEFAULT_BREAK_SCORE: u64 = 30_000;
5265pub const GIT_DIFF_DEFAULT_BREAK_SCORE: u64 = DIFF_DEFAULT_BREAK_SCORE;
5267pub const GIT_DIFF_DEFAULT_MERGE_SCORE_AFTER_BREAK: u64 = 36_000;
5270const DIFF_HASHBASE: u32 = 107_927;
5271
5272#[derive(Clone, Copy, Default)]
5273struct SpanSlot {
5274 hashval: u32,
5275 cnt: u32,
5276}
5277
5278struct SpanHashTop {
5279 alloc_log2: u8,
5280 free_slots: i32,
5281 data: Vec<SpanSlot>,
5282}
5283
5284impl SpanHashTop {
5285 fn new(initial_log2: u8) -> Self {
5286 let cap = 1usize << initial_log2;
5287 Self {
5288 alloc_log2: initial_log2,
5289 free_slots: initial_free(initial_log2),
5290 data: vec![SpanSlot::default(); cap],
5291 }
5292 }
5293
5294 fn len(&self) -> usize {
5295 1usize << self.alloc_log2
5296 }
5297
5298 fn add_span(&mut self, hashval: u32, cnt: u32) {
5299 loop {
5300 let lim = self.len();
5301 let mut bucket = (hashval as usize) & (lim - 1);
5302 loop {
5303 let h = &mut self.data[bucket];
5304 if h.cnt == 0 {
5305 h.hashval = hashval;
5306 h.cnt = cnt;
5307 self.free_slots -= 1;
5308 if self.free_slots < 0 {
5309 self.rehash();
5310 break;
5311 }
5312 return;
5313 }
5314 if h.hashval == hashval {
5315 h.cnt = h.cnt.saturating_add(cnt);
5316 return;
5317 }
5318 bucket += 1;
5319 if bucket >= lim {
5320 bucket = 0;
5321 }
5322 }
5323 }
5324 }
5325
5326 fn rehash(&mut self) {
5327 let old = std::mem::take(&mut self.data);
5328 let old_log = self.alloc_log2;
5329 self.alloc_log2 = old_log.saturating_add(1);
5330 let new_len = 1usize << self.alloc_log2;
5331 self.free_slots = initial_free(self.alloc_log2);
5332 self.data = vec![SpanSlot::default(); new_len];
5333 let old_sz = 1usize << old_log;
5334 for o in old.iter().take(old_sz) {
5335 let o = *o;
5336 if o.cnt == 0 {
5337 continue;
5338 }
5339 self.add_span_after_rehash(o.hashval, o.cnt);
5340 }
5341 }
5342
5343 fn add_span_after_rehash(&mut self, hashval: u32, cnt: u32) {
5344 loop {
5345 let lim = self.len();
5346 let mut bucket = (hashval as usize) & (lim - 1);
5347 loop {
5348 let h = &mut self.data[bucket];
5349 if h.cnt == 0 {
5350 h.hashval = hashval;
5351 h.cnt = cnt;
5352 self.free_slots -= 1;
5353 if self.free_slots < 0 {
5354 self.rehash();
5355 break;
5356 }
5357 return;
5358 }
5359 if h.hashval == hashval {
5360 h.cnt = h.cnt.saturating_add(cnt);
5361 return;
5362 }
5363 bucket += 1;
5364 if bucket >= lim {
5365 bucket = 0;
5366 }
5367 }
5368 }
5369 }
5370
5371 fn sort_by_hashval(&mut self) {
5372 let sz = self.len();
5373 self.data[..sz].sort_by(|a, b| {
5374 if a.cnt == 0 {
5375 return std::cmp::Ordering::Greater;
5376 }
5377 if b.cnt == 0 {
5378 return std::cmp::Ordering::Less;
5379 }
5380 a.hashval.cmp(&b.hashval)
5381 });
5382 }
5383}
5384
5385fn initial_free(sz_log2: u8) -> i32 {
5386 let sz = sz_log2 as i32;
5387 ((1i32 << sz_log2) * (sz - 3) / sz).max(0)
5388}
5389
5390fn hash_blob_spans(buf: &[u8], is_text: bool) -> SpanHashTop {
5391 let mut hash = SpanHashTop::new(9);
5392 let mut n = 0u32;
5393 let mut accum1: u32 = 0;
5394 let mut accum2: u32 = 0;
5395 let mut i = 0usize;
5396 while i < buf.len() {
5397 let c = buf[i] as u32;
5398 let old_1 = accum1;
5399 i += 1;
5400
5401 if is_text && c == b'\r' as u32 && i < buf.len() && buf[i] == b'\n' {
5402 continue;
5403 }
5404
5405 accum1 = accum1.wrapping_shl(7) ^ accum2.wrapping_shr(25);
5406 accum2 = accum2.wrapping_shl(7) ^ old_1.wrapping_shr(25);
5407 accum1 = accum1.wrapping_add(c);
5408 n += 1;
5409 if n < 64 && c != b'\n' as u32 {
5410 continue;
5411 }
5412 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
5413 hash.add_span(hashval, n);
5414 n = 0;
5415 accum1 = 0;
5416 accum2 = 0;
5417 }
5418 if n > 0 {
5419 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
5420 hash.add_span(hashval, n);
5421 }
5422 hash.sort_by_hashval();
5423 hash
5424}
5425
5426#[must_use]
5431pub fn diffcore_count_changes(old: &[u8], new: &[u8]) -> (u64, u64) {
5432 let src_is_text = !crate::merge_file::is_binary(old);
5433 let dst_is_text = !crate::merge_file::is_binary(new);
5434 let src_count = hash_blob_spans(old, src_is_text);
5435 let dst_count = hash_blob_spans(new, dst_is_text);
5436 let mut sc: u64 = 0;
5437 let mut la: u64 = 0;
5438 let mut si = 0usize;
5439 let mut di = 0usize;
5440 let src_len = src_count.len();
5441 let dst_len = dst_count.len();
5442 loop {
5443 if si >= src_len || src_count.data[si].cnt == 0 {
5444 break;
5445 }
5446 let s_hash = src_count.data[si].hashval;
5447 let s_cnt = u64::from(src_count.data[si].cnt);
5448 while di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval < s_hash {
5449 la += u64::from(dst_count.data[di].cnt);
5450 di += 1;
5451 }
5452 let mut dst_cnt = 0u64;
5453 if di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval == s_hash {
5454 dst_cnt = u64::from(dst_count.data[di].cnt);
5455 di += 1;
5456 }
5457 if s_cnt < dst_cnt {
5458 la += dst_cnt - s_cnt;
5459 sc += s_cnt;
5460 } else {
5461 sc += dst_cnt;
5462 }
5463 si += 1;
5464 }
5465 while di < dst_len && dst_count.data[di].cnt != 0 {
5466 la += u64::from(dst_count.data[di].cnt);
5467 di += 1;
5468 }
5469 (sc, la)
5470}
5471
5472#[must_use]
5475pub fn should_break_rewrite_for_stat(old: &[u8], new: &[u8]) -> bool {
5476 should_break_rewrite_inner(old, new, DIFF_DEFAULT_BREAK_SCORE)
5477}
5478
5479#[must_use]
5483pub fn should_break_rewrite_pair(old: &[u8], new: &[u8], break_score: u64) -> bool {
5484 should_break_rewrite_inner(old, new, break_score)
5485}
5486
5487pub fn parse_diff_rename_score_token(arg: &str) -> Option<u64> {
5490 let mut num: u64 = 0;
5491 let mut scale: u64 = 1;
5492 let mut dot = false;
5493 let mut saw_digit = false;
5494 for ch in arg.chars() {
5495 if !dot && ch == '.' {
5496 scale = 1;
5497 dot = true;
5498 continue;
5499 }
5500 if ch == '%' {
5501 scale = if dot { scale.saturating_mul(100) } else { 100 };
5502 break;
5503 }
5504 if ch.is_ascii_digit() {
5505 saw_digit = true;
5506 if scale < 100_000 {
5507 scale = scale.saturating_mul(10);
5508 num = num.saturating_mul(10) + u64::from(ch as u8 - b'0');
5509 }
5510 } else {
5511 break;
5512 }
5513 }
5514 if !saw_digit {
5515 return None;
5516 }
5517 Some(if num >= scale {
5518 GIT_DIFF_MAX_SCORE
5519 } else {
5520 GIT_DIFF_MAX_SCORE * num / scale
5521 })
5522}
5523
5524#[must_use]
5527pub fn rewrite_merge_score(old: &[u8], new: &[u8]) -> Option<u64> {
5528 if old.is_empty() {
5529 return None;
5530 }
5531 let max_size = old.len().max(new.len());
5532 if max_size < DIFF_MINIMUM_BREAK_SIZE {
5533 return None;
5534 }
5535 let (src_copied, _) = diffcore_count_changes(old, new);
5536 let src_copied = src_copied.min(old.len() as u64);
5537 let src_removed = (old.len() as u64).saturating_sub(src_copied);
5538 Some(src_removed * DIFF_MAX_SCORE / old.len() as u64)
5539}
5540
5541#[must_use]
5543pub fn rewrite_dissimilarity_index_percent(old: &[u8], new: &[u8]) -> Option<u32> {
5544 let score = rewrite_merge_score(old, new)?;
5545 Some((score * 100 / DIFF_MAX_SCORE).min(100) as u32)
5546}
5547
5548fn should_break_rewrite_inner(src: &[u8], dst: &[u8], break_score: u64) -> bool {
5549 if src.is_empty() {
5550 return false;
5551 }
5552 let max_size = src.len().max(dst.len());
5553 if max_size < DIFF_MINIMUM_BREAK_SIZE {
5554 return false;
5555 }
5556 let (src_copied, literal_added) = diffcore_count_changes(src, dst);
5557 let src_copied = src_copied.min(src.len() as u64);
5558 let mut literal_added = literal_added;
5559 let dst_len = dst.len() as u64;
5560 if src_copied < dst_len && literal_added + src_copied > dst_len {
5561 literal_added = dst_len.saturating_sub(src_copied);
5562 }
5563 let src_removed = (src.len() as u64).saturating_sub(src_copied);
5564 let merge_score = src_removed * DIFF_MAX_SCORE / src.len() as u64;
5565 if merge_score > break_score {
5566 return true;
5567 }
5568 let delta_size = src_removed.saturating_add(literal_added);
5569 if delta_size * DIFF_MAX_SCORE / (max_size as u64) < break_score {
5570 return false;
5571 }
5572 let s = src.len() as u64;
5573 if (s * break_score < src_removed * DIFF_MAX_SCORE)
5574 && (literal_added * 20 < src_removed)
5575 && (literal_added * 20 < src_copied)
5576 {
5577 return false;
5578 }
5579 true
5580}
5581
5582struct FlatEntry {
5586 path: String,
5587 mode: u32,
5588 oid: ObjectId,
5589}
5590
5591fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
5592 let entries = read_tree(odb, tree_oid)?;
5593 let mut result = Vec::new();
5594
5595 for entry in entries {
5596 let name_str = String::from_utf8_lossy(&entry.name);
5597 let path = format_path(prefix, &name_str);
5598 if is_tree_mode(entry.mode) {
5599 let nested = flatten_tree(odb, &entry.oid, &path)?;
5600 result.extend(nested);
5601 } else {
5602 result.push(FlatEntry {
5603 path,
5604 mode: entry.mode,
5605 oid: entry.oid,
5606 });
5607 }
5608 }
5609
5610 Ok(result)
5611}
5612
5613pub fn head_path_states(
5615 odb: &Odb,
5616 head_tree: Option<&ObjectId>,
5617) -> Result<std::collections::BTreeMap<String, (u32, ObjectId)>> {
5618 let mut m = std::collections::BTreeMap::new();
5619 let Some(t) = head_tree else {
5620 return Ok(m);
5621 };
5622 for fe in flatten_tree(odb, t, "")? {
5623 m.insert(fe.path, (fe.mode, fe.oid));
5624 }
5625 Ok(m)
5626}
5627
5628fn is_tree_mode(mode: u32) -> bool {
5630 mode == 0o040000
5631}
5632
5633fn format_path(prefix: &str, name: &str) -> String {
5635 if prefix.is_empty() {
5636 name.to_owned()
5637 } else {
5638 format!("{prefix}/{name}")
5639 }
5640}
5641
5642pub fn format_mode(mode: u32) -> String {
5644 format!("{mode:06o}")
5645}
5646
5647#[must_use]
5651pub fn read_submodule_head_for_checkout(sub_dir: &Path) -> Option<ObjectId> {
5652 read_submodule_head(sub_dir)
5653}
5654
5655#[must_use]
5660pub fn submodule_commit_subject_line(c: &CommitData) -> String {
5661 let enc = c.encoding.as_deref().unwrap_or("UTF-8");
5662 let is_latin1 = enc.eq_ignore_ascii_case("ISO8859-1")
5663 || enc.eq_ignore_ascii_case("ISO-8859-1")
5664 || enc.eq_ignore_ascii_case("LATIN1")
5665 || enc.eq_ignore_ascii_case("ISO-8859-15");
5666 if let Some(raw) = c.raw_message.as_deref() {
5667 let line = raw.split(|b| *b == b'\n').next().unwrap_or(raw);
5668 if is_latin1 {
5669 return line
5670 .iter()
5671 .map(|&b| b as char)
5672 .collect::<String>()
5673 .trim()
5674 .to_owned();
5675 }
5676 return String::from_utf8_lossy(line).trim().to_string();
5677 }
5678 c.message.lines().next().unwrap_or("").trim().to_owned()
5679}
5680
5681fn submodule_worktree_is_unpopulated_placeholder(sub_dir: &Path) -> bool {
5684 match fs::read_dir(sub_dir) {
5685 Ok(mut it) => it.next().is_none(),
5686 Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
5687 Err(_) => false,
5688 }
5689}
5690
5691fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
5692 read_submodule_head_oid(sub_dir)
5693}
5694
5695#[must_use]
5697pub fn submodule_embedded_git_dir(sub_dir: &Path) -> Option<PathBuf> {
5698 let gitfile = sub_dir.join(".git");
5699 if gitfile.is_file() {
5700 let content = fs::read_to_string(&gitfile).ok()?;
5701 let gitdir = content
5702 .lines()
5703 .find_map(|l| l.strip_prefix("gitdir: "))?
5704 .trim();
5705 Some(if Path::new(gitdir).is_absolute() {
5706 PathBuf::from(gitdir)
5707 } else {
5708 sub_dir.join(gitdir)
5709 })
5710 } else if gitfile.is_dir() {
5711 Some(gitfile)
5712 } else {
5713 None
5714 }
5715}
5716
5717fn find_superproject_git(sub_dir: &Path) -> Option<(PathBuf, PathBuf)> {
5719 let mut cur = sub_dir.parent()?;
5720 loop {
5721 let git_path = cur.join(".git");
5722 if git_path.exists() {
5723 let gd = if git_path.is_file() {
5724 let content = fs::read_to_string(&git_path).ok()?;
5725 let line = content
5726 .lines()
5727 .find_map(|l| l.strip_prefix("gitdir: "))?
5728 .trim();
5729 if Path::new(line).is_absolute() {
5730 PathBuf::from(line)
5731 } else {
5732 cur.join(line)
5733 }
5734 } else {
5735 git_path
5736 };
5737 return Some((cur.to_path_buf(), gd));
5738 }
5739 cur = cur.parent()?;
5740 }
5741}
5742
5743pub fn read_submodule_head_oid(sub_dir: &Path) -> Option<ObjectId> {
5749 let mut git_dir = submodule_embedded_git_dir(sub_dir)?;
5752 if let Some((super_wt, super_git_dir)) = find_superproject_git(sub_dir) {
5753 let rel = sub_dir.strip_prefix(&super_wt).ok()?;
5754 let rel_str = rel.to_string_lossy().replace('\\', "/");
5755 let local_mod = super_git_dir
5756 .join("modules")
5757 .join(rel_str.trim_start_matches('/'));
5758 if local_mod.join("HEAD").exists() {
5759 let sg = super_git_dir.canonicalize().unwrap_or(super_git_dir);
5760 let cur = git_dir.canonicalize().unwrap_or_else(|_| git_dir.clone());
5761 if !cur.starts_with(&sg) {
5762 git_dir = local_mod;
5763 }
5764 }
5765 }
5766 let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
5767 let head_trimmed = head_content.trim();
5768 if head_trimmed.starts_with("ref: ") {
5769 match crate::refs::resolve_ref(&git_dir, "HEAD") {
5773 Ok(oid) => Some(oid),
5774 Err(_) => {
5775 let mut found = None;
5776 for branch in ["main", "master"] {
5777 let p = git_dir.join("refs/heads").join(branch);
5778 if let Ok(s) = fs::read_to_string(&p) {
5779 if let Ok(o) = ObjectId::from_hex(s.trim()) {
5780 found = Some(o);
5781 break;
5782 }
5783 }
5784 }
5785 found
5786 }
5787 }
5788 } else {
5789 ObjectId::from_hex(head_trimmed).ok()
5790 }
5791}
5792
5793#[must_use]
5805pub fn submodule_head_object_broken(sub_dir: &Path) -> bool {
5806 let Some(sub_git_dir) = submodule_embedded_git_dir(sub_dir) else {
5807 return false;
5808 };
5809 let Some(head_oid) = read_submodule_head_oid(sub_dir) else {
5810 return false;
5811 };
5812 let odb = Odb::new(&sub_git_dir.join("objects"));
5813 match odb.read(&head_oid) {
5814 Ok(obj) => parse_commit(&obj.data).is_err(),
5817 Err(_) => true,
5818 }
5819}
5820
5821fn submodule_has_dirty_worktree_for_super_diff(
5824 super_worktree: &Path,
5825 rel_path: &str,
5826 recorded_oid: &ObjectId,
5827) -> bool {
5828 let flags = submodule_porcelain_flags(super_worktree, rel_path, *recorded_oid);
5829 flags.modified || flags.untracked
5830}
5831
5832#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
5834pub struct SubmodulePorcelainFlags {
5835 pub new_commits: bool,
5837 pub modified: bool,
5839 pub untracked: bool,
5841}
5842
5843pub fn submodule_porcelain_flags(
5849 super_worktree: &Path,
5850 rel_path: &str,
5851 recorded_oid: ObjectId,
5852) -> SubmodulePorcelainFlags {
5853 let sub_dir = super_worktree.join(rel_path);
5854 let Some(sub_git_dir) = submodule_embedded_git_dir(&sub_dir) else {
5855 return SubmodulePorcelainFlags::default();
5856 };
5857 let Some(sub_head) = read_submodule_head_oid(&sub_dir) else {
5858 return SubmodulePorcelainFlags::default();
5859 };
5860
5861 let new_commits = sub_head != recorded_oid;
5862
5863 let index_path = sub_git_dir.join("index");
5864 let sub_index = match crate::index::Index::load(&index_path) {
5865 Ok(ix) => ix,
5866 Err(_) => {
5867 return SubmodulePorcelainFlags {
5868 new_commits,
5869 ..Default::default()
5870 }
5871 }
5872 };
5873
5874 let tracked: std::collections::BTreeSet<String> = sub_index
5875 .entries
5876 .iter()
5877 .filter(|e| e.stage() == 0)
5878 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
5879 .collect();
5880 let untracked = submodule_dir_has_untracked_inner(&sub_dir, &sub_dir, &tracked, &sub_index);
5881
5882 let objects_dir = sub_git_dir.join("objects");
5883 let odb = Odb::new(&objects_dir);
5884
5885 let sub_head_tree = (|| -> Option<ObjectId> {
5886 let h = fs::read_to_string(sub_git_dir.join("HEAD")).ok()?;
5887 let h_str = h.trim();
5888 let commit_oid = if let Some(r) = h_str.strip_prefix("ref: ") {
5889 let oid_hex = fs::read_to_string(sub_git_dir.join(r)).ok()?;
5890 ObjectId::from_hex(oid_hex.trim()).ok()?
5891 } else {
5892 ObjectId::from_hex(h_str).ok()?
5893 };
5894 let obj = odb.read(&commit_oid).ok()?;
5895 let commit = parse_commit(&obj.data).ok()?;
5896 Some(commit.tree)
5897 })();
5898
5899 let staged_dirty = sub_head_tree
5900 .as_ref()
5901 .map(|t| diff_index_to_tree(&odb, &sub_index, Some(t), false).map(|v| !v.is_empty()))
5902 .unwrap_or(Ok(false));
5903 let staged_dirty = staged_dirty.unwrap_or(false);
5904
5905 let unstaged_dirty = diff_index_to_worktree(&odb, &sub_index, &sub_dir, false, true)
5906 .map(|v| !v.is_empty())
5907 .unwrap_or(false);
5908
5909 let mut modified = staged_dirty || unstaged_dirty;
5910
5911 for e in &sub_index.entries {
5916 if e.stage() != 0 || e.mode != 0o160000 {
5917 continue;
5918 }
5919 let child = String::from_utf8_lossy(&e.path).into_owned();
5920 let full_rel = if rel_path.is_empty() {
5921 child
5922 } else {
5923 format!("{rel_path}/{child}")
5924 };
5925 let nested = submodule_porcelain_flags(super_worktree, &full_rel, e.oid);
5926 modified |= nested.modified;
5927 }
5928
5929 SubmodulePorcelainFlags {
5930 new_commits,
5931 modified,
5932 untracked,
5933 }
5934}
5935
5936fn submodule_dir_has_untracked_inner(
5937 dir: &Path,
5938 root: &Path,
5939 tracked: &std::collections::BTreeSet<String>,
5940 owning_index: &Index,
5941) -> bool {
5942 let entries = match fs::read_dir(dir) {
5943 Ok(e) => e,
5944 Err(_) => return false,
5945 };
5946 let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
5947 sorted.sort_by_key(|e| e.file_name());
5948
5949 for entry in sorted {
5950 let name = entry.file_name().to_string_lossy().to_string();
5951 if name == ".git" {
5952 continue;
5953 }
5954 let path = entry.path();
5955 let rel = path
5956 .strip_prefix(root)
5957 .map(|p| p.to_string_lossy().to_string())
5958 .unwrap_or_else(|_| name.clone());
5959
5960 let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
5961 if is_dir {
5962 let is_gitlink = owning_index
5963 .get(rel.as_bytes(), 0)
5964 .is_some_and(|e| e.mode == 0o160000);
5965 if is_gitlink {
5966 let Some(nested_git) = submodule_embedded_git_dir(&path) else {
5967 continue;
5968 };
5969 let nested_index_path = nested_git.join("index");
5970 let Ok(nested_ix) = crate::index::Index::load(&nested_index_path) else {
5971 continue;
5972 };
5973 let nested_tracked: std::collections::BTreeSet<String> = nested_ix
5974 .entries
5975 .iter()
5976 .filter(|e| e.stage() == 0)
5977 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
5978 .collect();
5979 if submodule_dir_has_untracked_inner(&path, &path, &nested_tracked, &nested_ix) {
5980 return true;
5981 }
5982 } else if submodule_dir_has_untracked_inner(&path, root, tracked, owning_index) {
5983 return true;
5984 }
5985 } else if !tracked.contains(&rel) {
5986 return true;
5987 }
5988 }
5989 false
5990}
5991
5992pub fn apply_orderfile_entries(
5995 entries: Vec<DiffEntry>,
5996 order_path: &str,
5997 cwd: &Path,
5998) -> Result<Vec<DiffEntry>> {
5999 apply_orderfile(entries, order_path, cwd)
6000}
6001
6002fn apply_orderfile(
6003 mut entries: Vec<DiffEntry>,
6004 order_path: &str,
6005 cwd: &Path,
6006) -> Result<Vec<DiffEntry>> {
6007 let patterns = read_orderfile_patterns(order_path, cwd)?;
6008 let sort_key = |entry: &DiffEntry| -> usize {
6009 let path = entry
6010 .new_path
6011 .as_ref()
6012 .or(entry.old_path.as_ref())
6013 .cloned()
6014 .unwrap_or_default();
6015 for (i, pat) in patterns.iter().enumerate() {
6016 if orderfile_pattern_matches(pat, &path) {
6017 return i;
6018 }
6019 }
6020 patterns.len()
6021 };
6022 entries.sort_by_key(|e| sort_key(e));
6023 Ok(entries)
6024}
6025
6026pub fn read_orderfile_patterns(order_path: &str, cwd: &Path) -> Result<Vec<String>> {
6028 let path = Path::new(order_path);
6029 let resolved = if path.is_absolute() {
6030 path.to_path_buf()
6031 } else {
6032 cwd.join(path)
6033 };
6034 let _meta = std::fs::metadata(&resolved)
6035 .map_err(|e| Error::Message(format!("could not read orderfile {order_path}: {e}")))?;
6036 let mut f = std::fs::File::open(&resolved)
6037 .map_err(|e| Error::Message(format!("could not read orderfile {order_path}: {e}")))?;
6038 let mut content = String::new();
6039 std::io::Read::read_to_string(&mut f, &mut content)
6040 .map_err(|e| Error::Message(format!("could not read orderfile {order_path}: {e}")))?;
6041 Ok(content
6042 .lines()
6043 .map(|l| l.trim().to_string())
6044 .filter(|l| !l.is_empty() && !l.starts_with('#'))
6045 .collect())
6046}
6047
6048pub fn apply_rotate_skip_entries(
6050 mut entries: Vec<DiffEntry>,
6051 rotate_to: Option<&str>,
6052 skip_to: Option<&str>,
6053) -> Result<Vec<DiffEntry>> {
6054 let Some(needle) = rotate_to.or(skip_to) else {
6055 return Ok(entries);
6056 };
6057 let needle = needle.trim();
6058 if needle.is_empty() {
6059 return Ok(entries);
6060 }
6061 let idx = entries
6062 .iter()
6063 .position(|e| e.path() == needle)
6064 .ok_or_else(|| Error::Message(format!("fatal: No such path '{needle}' in the diff")))?;
6065 if rotate_to.is_some() {
6066 entries.rotate_left(idx);
6067 }
6068 if let Some(skip) = skip_to.filter(|s| !s.trim().is_empty()) {
6069 let pos = entries
6070 .iter()
6071 .position(|e| e.path() == skip)
6072 .ok_or_else(|| Error::Message(format!("fatal: No such path '{skip}' in the diff")))?;
6073 entries.drain(..pos);
6074 }
6075 Ok(entries)
6076}
6077
6078pub fn apply_rotate_skip_log_entries(
6081 odb: &Odb,
6082 commit_tree: &ObjectId,
6083 entries: Vec<DiffEntry>,
6084 rotate_to: Option<&str>,
6085 skip_to: Option<&str>,
6086) -> Result<Vec<DiffEntry>> {
6087 fn trimmed(s: Option<&str>) -> Option<&str> {
6092 s.map(str::trim).filter(|t| !t.is_empty())
6093 }
6094 if trimmed(rotate_to).is_none() && trimmed(skip_to).is_none() {
6095 return Ok(entries);
6096 }
6097 let tree_paths = crate::merge_diff::all_blob_paths_in_tree_order(odb, commit_tree);
6098 apply_rotate_skip_ordered_paths(&tree_paths, entries, rotate_to, skip_to)
6099}
6100
6101fn apply_rotate_skip_ordered_paths(
6102 tree_paths: &[String],
6103 entries: Vec<DiffEntry>,
6104 rotate_to: Option<&str>,
6105 skip_to: Option<&str>,
6106) -> Result<Vec<DiffEntry>> {
6107 let rotate = rotate_to.and_then(|s| {
6108 let t = s.trim();
6109 (!t.is_empty()).then_some(t)
6110 });
6111 let skip = skip_to.and_then(|s| {
6112 let t = s.trim();
6113 (!t.is_empty()).then_some(t)
6114 });
6115 if rotate.is_none() && skip.is_none() {
6116 return Ok(entries);
6117 }
6118
6119 use std::collections::HashMap;
6120 let mut by_path: HashMap<String, DiffEntry> = HashMap::new();
6121 for e in entries {
6122 by_path.insert(e.path().to_string(), e);
6123 }
6124
6125 if rotate.is_none() {
6128 let Some(skip_path) = skip else {
6129 return Ok(by_path.into_values().collect());
6130 };
6131 let idx = tree_paths
6132 .iter()
6133 .position(|p| p == skip_path)
6134 .ok_or_else(|| {
6135 Error::Message(format!("fatal: No such path '{skip_path}' in the diff"))
6136 })?;
6137 let mut out = Vec::new();
6138 for p in tree_paths.iter().skip(idx) {
6139 if let Some(e) = by_path.remove(p) {
6140 out.push(e);
6141 }
6142 }
6143 return Ok(out);
6144 }
6145
6146 let Some(needle) = rotate else {
6147 return Ok(by_path.into_values().collect());
6148 };
6149 let idx = tree_paths
6150 .iter()
6151 .position(|p| p == needle)
6152 .ok_or_else(|| Error::Message(format!("fatal: No such path '{needle}' in the diff")))?;
6153 let mut order: Vec<String> = tree_paths.to_vec();
6154 order.rotate_left(idx);
6155 if let Some(skip_path) = skip {
6156 let pos = order.iter().position(|p| p == skip_path).ok_or_else(|| {
6157 Error::Message(format!("fatal: No such path '{skip_path}' in the diff"))
6158 })?;
6159 order.drain(..pos);
6160 }
6161 let mut out = Vec::new();
6162 for p in order {
6163 if let Some(e) = by_path.remove(&p) {
6164 out.push(e);
6165 }
6166 }
6167 Ok(out)
6168}
6169
6170pub fn orderfile_pattern_matches(pattern: &str, path: &str) -> bool {
6173 let name = path.rsplit('/').next().unwrap_or(path);
6174 orderfile_glob_match(pattern, name) || orderfile_glob_match(pattern, path)
6175}
6176
6177fn orderfile_glob_match(pattern: &str, text: &str) -> bool {
6179 let mut pi = 0;
6180 let mut ti = 0;
6181 let pb = pattern.as_bytes();
6182 let tb = text.as_bytes();
6183 let mut star_pi = usize::MAX;
6184 let mut star_ti = 0;
6185
6186 while ti < tb.len() {
6187 if pi < pb.len() && (pb[pi] == b'?' || pb[pi] == tb[ti]) {
6188 pi += 1;
6189 ti += 1;
6190 } else if pi < pb.len() && pb[pi] == b'*' {
6191 star_pi = pi;
6192 star_ti = ti;
6193 pi += 1;
6194 } else if star_pi != usize::MAX {
6195 pi = star_pi + 1;
6196 star_ti += 1;
6197 ti = star_ti;
6198 } else {
6199 return false;
6200 }
6201 }
6202 while pi < pb.len() && pb[pi] == b'*' {
6203 pi += 1;
6204 }
6205 pi == pb.len()
6206}