1use std::fs;
24#[cfg(unix)]
25use std::os::unix::fs::MetadataExt;
26use std::path::{Path, PathBuf};
27
28use crate::config::ConfigSet;
29use crate::diff_indent_heuristic;
30use crate::error::{Error, Result};
31use crate::index::{Index, IndexEntry};
32use crate::objects::{parse_commit, parse_tree, CommitData, ObjectId, ObjectKind, TreeEntry};
33use crate::odb::Odb;
34use crate::userdiff::FuncnameMatcher;
35
36fn imara_unified_hunk_slices(body: &str) -> Vec<&str> {
38 let mut starts: Vec<usize> = Vec::new();
39 if body.starts_with("@@") {
40 starts.push(0);
41 }
42 for (idx, _) in body.match_indices("\n@@ ") {
43 starts.push(idx + 1);
44 }
45 starts.push(body.len());
46 starts.windows(2).map(|w| &body[w[0]..w[1]]).collect()
47}
48
49fn histogram_unified_body_raw(
50 old_content: &str,
51 new_content: &str,
52 context_lines: usize,
53 inter_hunk_context: usize,
54) -> String {
55 use imara_diff::{Algorithm, Diff, Hunk, InternedInput};
56 use std::fmt::Write as _;
57
58 let input = InternedInput::new(old_content, new_content);
59 let mut diff = Diff::compute(Algorithm::Histogram, &input);
60 diff.postprocess_lines(&input);
61
62 let hunks: Vec<Hunk> = diff.hunks().collect();
68 if hunks.is_empty() {
69 return String::new();
70 }
71
72 let ctx = context_lines.min(u32::MAX as usize) as u32;
73 let max_gap = (2usize.saturating_mul(context_lines))
74 .saturating_add(inter_hunk_context)
75 .min(u32::MAX as usize) as u32;
76 let before_len = input.before.len() as u32;
77 let after_len = input.after.len() as u32;
78
79 let mut groups: Vec<&[Hunk]> = Vec::new();
81 let mut group_start = 0usize;
82 for i in 1..hunks.len() {
83 if hunks[i].before.start - hunks[i - 1].before.end > max_gap {
84 groups.push(&hunks[group_start..i]);
85 group_start = i;
86 }
87 }
88 groups.push(&hunks[group_start..]);
89
90 fn push_line(out: &mut String, prefix: char, text: &str) {
91 out.push(prefix);
92 out.push_str(text);
93 if !text.ends_with('\n') {
94 out.push('\n');
95 }
96 }
97
98 fn fmt_side(start: u32, count: u32) -> String {
101 let shown_start = if count == 0 { start } else { start + 1 };
102 if count == 1 {
103 format!("{shown_start}")
104 } else {
105 format!("{shown_start},{count}")
106 }
107 }
108
109 let mut out = String::new();
110 for group in groups {
111 let first = &group[0];
112 let last = &group[group.len() - 1];
113 let b_start = first.before.start.saturating_sub(ctx);
114 let a_start = first.after.start.saturating_sub(ctx);
115 let b_end = (last.before.end.saturating_add(ctx)).min(before_len);
116 let a_end = (last.after.end.saturating_add(ctx)).min(after_len);
117
118 let _ = writeln!(
119 out,
120 "@@ -{} +{} @@",
121 fmt_side(b_start, b_end - b_start),
122 fmt_side(a_start, a_end - a_start)
123 );
124
125 let mut pos = b_start;
126 for hunk in group {
127 for &token in &input.before[pos as usize..hunk.before.start as usize] {
128 push_line(&mut out, ' ', input.interner[token]);
129 }
130 for &token in &input.before[hunk.before.start as usize..hunk.before.end as usize] {
131 push_line(&mut out, '-', input.interner[token]);
132 }
133 for &token in &input.after[hunk.after.start as usize..hunk.after.end as usize] {
134 push_line(&mut out, '+', input.interner[token]);
135 }
136 pos = hunk.before.end;
137 }
138 for &token in &input.before[pos as usize..b_end as usize] {
139 push_line(&mut out, ' ', input.interner[token]);
140 }
141 }
142
143 out
144}
145
146#[allow(clippy::too_many_arguments)]
162pub(crate) fn histogram_unified_body_ignore_fc<F>(
163 old_content: &str,
164 new_content: &str,
165 context_lines: usize,
166 inter_hunk_context: usize,
167 function_context: bool,
168 funcname_matcher: Option<&FuncnameMatcher>,
169 is_ignorable_change: F,
170) -> String
171where
172 F: Fn(&[&str], &[&str]) -> bool,
173{
174 use imara_diff::{Algorithm, Diff, Hunk, InternedInput};
175 use std::fmt::Write as _;
176
177 let input = InternedInput::new(old_content, new_content);
178 let mut diff = Diff::compute(Algorithm::Histogram, &input);
179 diff.postprocess_lines(&input);
180
181 let raw_hunks: Vec<Hunk> = diff.hunks().collect();
182 if raw_hunks.is_empty() {
183 return String::new();
184 }
185
186 let before_len = input.before.len() as i64;
187 let after_len = input.after.len() as i64;
188 let ctxlen = context_lines as i64;
189 let interhunk = inter_hunk_context as i64;
190 let max_common = ctxlen.saturating_add(ctxlen).saturating_add(interhunk);
192 let max_ignorable = ctxlen;
193
194 struct Change {
196 i1: i64,
197 chg1: i64,
198 i2: i64,
199 chg2: i64,
200 ignore: bool,
201 }
202
203 let mut changes: Vec<Change> = Vec::with_capacity(raw_hunks.len());
204 for h in &raw_hunks {
205 let i1 = h.before.start as i64;
206 let chg1 = (h.before.end - h.before.start) as i64;
207 let i2 = h.after.start as i64;
208 let chg2 = (h.after.end - h.after.start) as i64;
209 let removed: Vec<&str> = input.before[h.before.start as usize..h.before.end as usize]
210 .iter()
211 .map(|&t| input.interner[t])
212 .collect();
213 let added: Vec<&str> = input.after[h.after.start as usize..h.after.end as usize]
214 .iter()
215 .map(|&t| input.interner[t])
216 .collect();
217 let ignore = is_ignorable_change(&removed, &added);
218 changes.push(Change {
219 i1,
220 chg1,
221 i2,
222 chg2,
223 ignore,
224 });
225 }
226
227 let before_line = |idx: i64| -> &str { input.interner[input.before[idx as usize]] };
229 let after_line = |idx: i64| -> &str { input.interner[input.after[idx as usize]] };
230
231 let get_hunk = |start: usize| -> Option<(usize, usize)> {
237 let mut scr = start;
243 let mut xchp = start;
244 while xchp < changes.len() && changes[xchp].ignore {
245 let next = xchp + 1;
246 if next >= changes.len()
247 || changes[next].i1 - (changes[xchp].i1 + changes[xchp].chg1) >= max_ignorable
248 {
249 scr = next;
250 }
251 xchp = next;
252 }
253 if scr >= changes.len() {
254 return None;
255 }
256
257 let mut lxch = scr;
258 let mut ignored: i64 = 0;
259 let mut xchp = scr;
260 let mut idx = scr + 1;
261 while idx < changes.len() {
262 let xch = &changes[idx];
263 let prev = &changes[xchp];
264 let distance = xch.i1 - (prev.i1 + prev.chg1);
265 if distance > max_common {
266 break;
267 }
268 if distance < max_ignorable && (!xch.ignore || lxch == xchp) {
269 lxch = idx;
270 ignored = 0;
271 } else if distance < max_ignorable && xch.ignore {
272 ignored += xch.chg2;
273 } else if lxch != xchp
274 && xch.i1 + ignored - (changes[lxch].i1 + changes[lxch].chg1) > max_common
275 {
276 break;
277 } else if !xch.ignore {
278 lxch = idx;
279 ignored = 0;
280 } else {
281 ignored += xch.chg2;
282 }
283 xchp = idx;
284 idx += 1;
285 }
286 Some((scr, lxch))
287 };
288
289 fn push_line(out: &mut String, prefix: char, text: &str) {
290 out.push(prefix);
291 out.push_str(text);
292 if !text.ends_with('\n') {
293 out.push('\n');
294 }
295 }
296
297 fn fmt_side(start: i64, count: i64) -> String {
299 if count == 1 {
300 format!("{start}")
301 } else {
302 format!("{start},{count}")
303 }
304 }
305
306 let mut out = String::new();
307 let mut cursor = 0usize;
308 while cursor < changes.len() {
309 let Some((xch_idx, xche_idx)) = get_hunk(cursor) else {
310 break;
311 };
312 let xch = &changes[xch_idx];
313 let xche = &changes[xche_idx];
314
315 let mut s1 = (xch.i1 - ctxlen).max(0);
317 let mut s2 = (xch.i2 - ctxlen).max(0);
318 let mut lctx = ctxlen;
320 lctx = lctx.min(before_len - (xche.i1 + xche.chg1));
321 lctx = lctx.min(after_len - (xche.i2 + xche.chg2));
322 let mut e1 = xche.i1 + xche.chg1 + lctx;
323 let mut e2 = xche.i2 + xche.chg2 + lctx;
324
325 if function_context {
330 let mut fs1 = xch.i1;
333 while fs1 >= 0 && !is_func_line(before_line(fs1), funcname_matcher) {
334 fs1 -= 1;
335 }
336 while fs1 > 0
337 && before_line(fs1 - 1).trim().is_empty() == false
338 && !is_func_line(before_line(fs1 - 1), funcname_matcher)
339 {
340 fs1 -= 1;
341 }
342 if fs1 < 0 {
343 fs1 = 0;
344 }
345 if fs1 < s1 {
346 s2 = (s2 - (s1 - fs1)).max(0);
347 s1 = fs1;
348 }
349 let end1 = xche.i1 + xche.chg1;
352 let mut fe1 = end1;
353 while fe1 < before_len && !is_func_line(before_line(fe1), funcname_matcher) {
354 fe1 += 1;
355 }
356 while fe1 > 0 && before_line(fe1 - 1).trim().is_empty() {
357 fe1 -= 1;
358 }
359 if fe1 > before_len {
360 fe1 = before_len;
361 }
362 if fe1 > e1 {
363 e2 = (e2 + (fe1 - e1)).min(after_len);
364 e1 = fe1;
365 }
366 }
367
368 let _ = writeln!(
369 out,
370 "@@ -{} +{} @@",
371 fmt_side(s1 + 1, e1 - s1),
372 fmt_side(s2 + 1, e2 - s2)
373 );
374
375 let mut s2c = s2;
377 while s2c < xch.i2 {
378 push_line(&mut out, ' ', after_line(s2c));
379 s2c += 1;
380 }
381
382 let mut s1c = xch.i1;
384 let mut s2c = xch.i2;
385 let mut k = xch_idx;
386 loop {
387 let c = &changes[k];
388 while s1c < c.i1 && s2c < c.i2 {
390 push_line(&mut out, ' ', after_line(s2c));
391 s1c += 1;
392 s2c += 1;
393 }
394 let mut r = c.i1;
396 while r < c.i1 + c.chg1 {
397 push_line(&mut out, '-', before_line(r));
398 r += 1;
399 }
400 let mut a = c.i2;
402 while a < c.i2 + c.chg2 {
403 push_line(&mut out, '+', after_line(a));
404 a += 1;
405 }
406 if k == xche_idx {
407 break;
408 }
409 s1c = c.i1 + c.chg1;
410 s2c = c.i2 + c.chg2;
411 k += 1;
412 }
413
414 let mut s2p = xche.i2 + xche.chg2;
416 while s2p < e2 {
417 push_line(&mut out, ' ', after_line(s2p));
418 s2p += 1;
419 }
420
421 cursor = xche_idx + 1;
422 }
423
424 out
425}
426
427#[must_use]
431pub fn unified_diff_histogram_hunks_only(
432 old_content: &str,
433 new_content: &str,
434 context_lines: usize,
435 inter_hunk_context: usize,
436) -> String {
437 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context)
438}
439
440#[must_use]
442pub fn unified_diff_histogram_with_prefix_and_funcname(
443 old_content: &str,
444 new_content: &str,
445 old_path: &str,
446 new_path: &str,
447 context_lines: usize,
448 inter_hunk_context: usize,
449 src_prefix: &str,
450 dst_prefix: &str,
451 funcname_matcher: Option<&FuncnameMatcher>,
452 quote_path_fully: bool,
453) -> String {
454 use crate::quote_path::format_diff_path_with_prefix;
455
456 let body =
457 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context);
458
459 let mut output = String::new();
460 if old_path == "/dev/null" {
461 output.push_str("--- /dev/null\n");
462 } else if src_prefix.is_empty() {
463 output.push_str(&format!("--- {old_path}\n"));
464 } else {
465 output.push_str("--- ");
466 output.push_str(&format_diff_path_with_prefix(
467 src_prefix,
468 old_path,
469 quote_path_fully,
470 ));
471 output.push('\n');
472 }
473 if new_path == "/dev/null" {
474 output.push_str("+++ /dev/null\n");
475 } else if dst_prefix.is_empty() {
476 output.push_str(&format!("+++ {new_path}\n"));
477 } else {
478 output.push_str("+++ ");
479 output.push_str(&format_diff_path_with_prefix(
480 dst_prefix,
481 new_path,
482 quote_path_fully,
483 ));
484 output.push('\n');
485 }
486
487 let old_lines: Vec<&str> = old_content.lines().collect();
488 for hunk_str in imara_unified_hunk_slices(&body) {
489 if hunk_str.is_empty() {
490 continue;
491 }
492 if let Some(first_newline) = hunk_str.find('\n') {
493 let header_line = &hunk_str[..first_newline];
494 let rest = &hunk_str[first_newline..];
495 if let Some(func_ctx) =
496 extract_function_context(header_line, &old_lines, funcname_matcher)
497 {
498 output.push_str(header_line);
499 output.push(' ');
500 output.push_str(&func_ctx);
501 output.push_str(rest);
502 } else {
503 output.push_str(hunk_str);
504 }
505 } else {
506 output.push_str(hunk_str);
507 }
508 }
509
510 output
511}
512
513#[allow(clippy::too_many_arguments)]
523pub fn unified_diff_histogram_ignore_with_prefix_and_funcname<F>(
524 old_content: &str,
525 new_content: &str,
526 old_path: &str,
527 new_path: &str,
528 context_lines: usize,
529 inter_hunk_context: usize,
530 src_prefix: &str,
531 dst_prefix: &str,
532 funcname_matcher: Option<&FuncnameMatcher>,
533 quote_path_fully: bool,
534 function_context: bool,
535 is_ignorable_change: F,
536) -> String
537where
538 F: Fn(&[&str], &[&str]) -> bool,
539{
540 use crate::quote_path::format_diff_path_with_prefix;
541
542 let body = histogram_unified_body_ignore_fc(
543 old_content,
544 new_content,
545 context_lines,
546 inter_hunk_context,
547 function_context,
548 funcname_matcher,
549 is_ignorable_change,
550 );
551 if body.is_empty() {
552 return String::new();
553 }
554
555 let mut output = String::new();
556 if old_path == "/dev/null" {
557 output.push_str("--- /dev/null\n");
558 } else if src_prefix.is_empty() {
559 output.push_str(&format!("--- {old_path}\n"));
560 } else {
561 output.push_str("--- ");
562 output.push_str(&format_diff_path_with_prefix(
563 src_prefix,
564 old_path,
565 quote_path_fully,
566 ));
567 output.push('\n');
568 }
569 if new_path == "/dev/null" {
570 output.push_str("+++ /dev/null\n");
571 } else if dst_prefix.is_empty() {
572 output.push_str(&format!("+++ {new_path}\n"));
573 } else {
574 output.push_str("+++ ");
575 output.push_str(&format_diff_path_with_prefix(
576 dst_prefix,
577 new_path,
578 quote_path_fully,
579 ));
580 output.push('\n');
581 }
582
583 let old_lines: Vec<&str> = old_content.lines().collect();
584 for hunk_str in imara_unified_hunk_slices(&body) {
585 if hunk_str.is_empty() {
586 continue;
587 }
588 if let Some(first_newline) = hunk_str.find('\n') {
589 let header_line = &hunk_str[..first_newline];
590 let rest = &hunk_str[first_newline..];
591 if let Some(func_ctx) =
592 extract_function_context(header_line, &old_lines, funcname_matcher)
593 {
594 output.push_str(header_line);
595 output.push(' ');
596 output.push_str(&func_ctx);
597 output.push_str(rest);
598 } else {
599 output.push_str(hunk_str);
600 }
601 } else {
602 output.push_str(hunk_str);
603 }
604 }
605
606 output
607}
608
609#[must_use]
611pub fn indent_heuristic_from_config(config: &ConfigSet) -> bool {
612 match config.get_bool("diff.indentHeuristic") {
613 Some(Ok(b)) => b,
614 Some(Err(_)) | None => true,
615 }
616}
617
618#[must_use]
620pub fn resolve_indent_heuristic(
621 config: &ConfigSet,
622 cli_indent_heuristic: bool,
623 cli_no_indent_heuristic: bool,
624) -> bool {
625 if cli_no_indent_heuristic {
626 false
627 } else if cli_indent_heuristic {
628 true
629 } else {
630 indent_heuristic_from_config(config)
631 }
632}
633
634#[must_use]
636pub fn parse_indent_heuristic_cli_flags(argv: &[String]) -> (bool, bool) {
637 let mut indent_heuristic = false;
638 let mut no_indent_heuristic = false;
639 for a in argv {
640 match a.as_str() {
641 "--indent-heuristic" => {
642 indent_heuristic = true;
643 no_indent_heuristic = false;
644 }
645 "--no-indent-heuristic" => {
646 no_indent_heuristic = true;
647 indent_heuristic = false;
648 }
649 _ => {}
650 }
651 }
652 (indent_heuristic, no_indent_heuristic)
653}
654
655mod git_xdiff {
665 const XDL_MAX_COST_MIN: i64 = 256;
666 const XDL_HEUR_MIN_COST: i64 = 256;
667 const XDL_SNAKE_CNT: i64 = 20;
668 const XDL_K_HEUR: i64 = 4;
669 const XDL_LINE_MAX: i64 = i64::MAX;
670 const XDL_KPDIS_RUN: i64 = 4;
671 const XDL_MAX_EQLIMIT: i64 = 1024;
672 const XDL_SIMSCAN_WINDOW: i64 = 100;
673
674 const DISCARD: u8 = 0;
676 const KEEP: u8 = 1;
677 const INVESTIGATE: u8 = 2;
678
679 fn bogosqrt(mut n: i64) -> i64 {
681 let mut i: i64 = 1;
682 while n > 0 {
683 i <<= 1;
684 n >>= 2;
685 }
686 i
687 }
688
689 struct XdAlgoEnv {
690 mxcost: i64,
691 snake_cnt: i64,
692 heur_min: i64,
693 }
694
695 struct XdpSplit {
696 i1: i64,
697 i2: i64,
698 min_lo: bool,
699 min_hi: bool,
700 }
701
702 struct XdFile<'a> {
705 ids: &'a [u32],
706 changed: Vec<bool>,
707 reference_index: Vec<usize>,
708 dstart: i64,
709 dend: i64,
710 nreff: i64,
711 }
712
713 #[inline]
715 fn get_id(xdf: &XdFile<'_>, idx: i64) -> u32 {
716 xdf.ids[xdf.reference_index[idx as usize]]
717 }
718
719 #[allow(clippy::too_many_arguments)]
722 fn xdl_split(
723 xdf1: &XdFile<'_>,
724 off1: i64,
725 lim1: i64,
726 xdf2: &XdFile<'_>,
727 off2: i64,
728 lim2: i64,
729 kvdf: &mut [i64],
730 kvdb: &mut [i64],
731 koff: i64,
732 need_min: bool,
733 spl: &mut XdpSplit,
734 xenv: &XdAlgoEnv,
735 ) -> i64 {
736 let dmin = off1 - lim2;
737 let dmax = lim1 - off2;
738 let fmid = off1 - off2;
739 let bmid = lim1 - lim2;
740 let odd = ((fmid - bmid) & 1) != 0;
741 let mut fmin = fmid;
742 let mut fmax = fmid;
743 let mut bmin = bmid;
744 let mut bmax = bmid;
745
746 let kf = |k: i64| -> usize { (k + koff) as usize };
747
748 kvdf[kf(fmid)] = off1;
749 kvdb[kf(bmid)] = lim1;
750
751 let mut ec: i64 = 1;
752 loop {
753 let mut got_snake = false;
754
755 if fmin > dmin {
756 fmin -= 1;
757 kvdf[kf(fmin - 1)] = -1;
758 } else {
759 fmin += 1;
760 }
761 if fmax < dmax {
762 fmax += 1;
763 kvdf[kf(fmax + 1)] = -1;
764 } else {
765 fmax -= 1;
766 }
767
768 let mut d = fmax;
769 while d >= fmin {
770 let mut i1 = if kvdf[kf(d - 1)] >= kvdf[kf(d + 1)] {
771 kvdf[kf(d - 1)] + 1
772 } else {
773 kvdf[kf(d + 1)]
774 };
775 let prev1 = i1;
776 let mut i2 = i1 - d;
777 while i1 < lim1 && i2 < lim2 && get_id(xdf1, i1) == get_id(xdf2, i2) {
778 i1 += 1;
779 i2 += 1;
780 }
781 if i1 - prev1 > xenv.snake_cnt {
782 got_snake = true;
783 }
784 kvdf[kf(d)] = i1;
785 if odd && bmin <= d && d <= bmax && kvdb[kf(d)] <= i1 {
786 spl.i1 = i1;
787 spl.i2 = i2;
788 spl.min_lo = true;
789 spl.min_hi = true;
790 return ec;
791 }
792 d -= 2;
793 }
794
795 if bmin > dmin {
796 bmin -= 1;
797 kvdb[kf(bmin - 1)] = XDL_LINE_MAX;
798 } else {
799 bmin += 1;
800 }
801 if bmax < dmax {
802 bmax += 1;
803 kvdb[kf(bmax + 1)] = XDL_LINE_MAX;
804 } else {
805 bmax -= 1;
806 }
807
808 let mut d = bmax;
809 while d >= bmin {
810 let mut i1 = if kvdb[kf(d - 1)] < kvdb[kf(d + 1)] {
811 kvdb[kf(d - 1)]
812 } else {
813 kvdb[kf(d + 1)] - 1
814 };
815 let prev1 = i1;
816 let mut i2 = i1 - d;
817 while i1 > off1 && i2 > off2 && get_id(xdf1, i1 - 1) == get_id(xdf2, i2 - 1) {
818 i1 -= 1;
819 i2 -= 1;
820 }
821 if prev1 - i1 > xenv.snake_cnt {
822 got_snake = true;
823 }
824 kvdb[kf(d)] = i1;
825 if !odd && fmin <= d && d <= fmax && i1 <= kvdf[kf(d)] {
826 spl.i1 = i1;
827 spl.i2 = i2;
828 spl.min_lo = true;
829 spl.min_hi = true;
830 return ec;
831 }
832 d -= 2;
833 }
834
835 if need_min {
836 ec += 1;
837 continue;
838 }
839
840 if got_snake && ec > xenv.heur_min {
841 let mut best = 0i64;
842 let mut d = fmax;
843 while d >= fmin {
844 let dd = if d > fmid { d - fmid } else { fmid - d };
845 let i1 = kvdf[kf(d)];
846 let i2 = i1 - d;
847 let v = (i1 - off1) + (i2 - off2) - dd;
848 if v > XDL_K_HEUR * ec
849 && v > best
850 && off1 + xenv.snake_cnt <= i1
851 && i1 < lim1
852 && off2 + xenv.snake_cnt <= i2
853 && i2 < lim2
854 {
855 let mut k = 1i64;
856 while get_id(xdf1, i1 - k) == get_id(xdf2, i2 - k) {
857 if k == xenv.snake_cnt {
858 best = v;
859 spl.i1 = i1;
860 spl.i2 = i2;
861 break;
862 }
863 k += 1;
864 }
865 }
866 d -= 2;
867 }
868 if best > 0 {
869 spl.min_lo = true;
870 spl.min_hi = false;
871 return ec;
872 }
873
874 let mut best = 0i64;
875 let mut d = bmax;
876 while d >= bmin {
877 let dd = if d > bmid { d - bmid } else { bmid - d };
878 let i1 = kvdb[kf(d)];
879 let i2 = i1 - d;
880 let v = (lim1 - i1) + (lim2 - i2) - dd;
881 if v > XDL_K_HEUR * ec
882 && v > best
883 && off1 < i1
884 && i1 <= lim1 - xenv.snake_cnt
885 && off2 < i2
886 && i2 <= lim2 - xenv.snake_cnt
887 {
888 let mut k = 0i64;
889 while get_id(xdf1, i1 + k) == get_id(xdf2, i2 + k) {
890 if k == xenv.snake_cnt - 1 {
891 best = v;
892 spl.i1 = i1;
893 spl.i2 = i2;
894 break;
895 }
896 k += 1;
897 }
898 }
899 d -= 2;
900 }
901 if best > 0 {
902 spl.min_lo = false;
903 spl.min_hi = true;
904 return ec;
905 }
906 }
907
908 if ec >= xenv.mxcost {
909 let mut fbest = -1i64;
910 let mut fbest1 = -1i64;
911 let mut d = fmax;
912 while d >= fmin {
913 let mut i1 = kvdf[kf(d)].min(lim1);
914 let mut i2 = i1 - d;
915 if lim2 < i2 {
916 i1 = lim2 + d;
917 i2 = lim2;
918 }
919 if fbest < i1 + i2 {
920 fbest = i1 + i2;
921 fbest1 = i1;
922 }
923 d -= 2;
924 }
925
926 let mut bbest = XDL_LINE_MAX;
927 let mut bbest1 = XDL_LINE_MAX;
928 let mut d = bmax;
929 while d >= bmin {
930 let mut i1 = off1.max(kvdb[kf(d)]);
931 let mut i2 = i1 - d;
932 if i2 < off2 {
933 i1 = off2 + d;
934 i2 = off2;
935 }
936 if i1 + i2 < bbest {
937 bbest = i1 + i2;
938 bbest1 = i1;
939 }
940 d -= 2;
941 }
942
943 if (lim1 + lim2) - bbest < fbest - (off1 + off2) {
944 spl.i1 = fbest1;
945 spl.i2 = fbest - fbest1;
946 spl.min_lo = true;
947 spl.min_hi = false;
948 } else {
949 spl.i1 = bbest1;
950 spl.i2 = bbest - bbest1;
951 spl.min_lo = false;
952 spl.min_hi = true;
953 }
954 return ec;
955 }
956
957 ec += 1;
958 }
959 }
960
961 #[allow(clippy::too_many_arguments)]
963 fn xdl_recs_cmp(
964 xdf1: &mut XdFile<'_>,
965 mut off1: i64,
966 mut lim1: i64,
967 xdf2: &mut XdFile<'_>,
968 mut off2: i64,
969 mut lim2: i64,
970 kvdf: &mut [i64],
971 kvdb: &mut [i64],
972 koff: i64,
973 need_min: bool,
974 xenv: &XdAlgoEnv,
975 ) {
976 while off1 < lim1 && off2 < lim2 && get_id(xdf1, off1) == get_id(xdf2, off2) {
977 off1 += 1;
978 off2 += 1;
979 }
980 while off1 < lim1 && off2 < lim2 && get_id(xdf1, lim1 - 1) == get_id(xdf2, lim2 - 1) {
981 lim1 -= 1;
982 lim2 -= 1;
983 }
984
985 if off1 == lim1 {
986 while off2 < lim2 {
987 let r = xdf2.reference_index[off2 as usize];
988 xdf2.changed[r] = true;
989 off2 += 1;
990 }
991 } else if off2 == lim2 {
992 while off1 < lim1 {
993 let r = xdf1.reference_index[off1 as usize];
994 xdf1.changed[r] = true;
995 off1 += 1;
996 }
997 } else {
998 let mut spl = XdpSplit {
999 i1: 0,
1000 i2: 0,
1001 min_lo: false,
1002 min_hi: false,
1003 };
1004 xdl_split(
1005 xdf1, off1, lim1, xdf2, off2, lim2, kvdf, kvdb, koff, need_min, &mut spl, xenv,
1006 );
1007 xdl_recs_cmp(
1008 xdf1, off1, spl.i1, xdf2, off2, spl.i2, kvdf, kvdb, koff, spl.min_lo, xenv,
1009 );
1010 xdl_recs_cmp(
1011 xdf1, spl.i1, lim1, xdf2, spl.i2, lim2, kvdf, kvdb, koff, spl.min_hi, xenv,
1012 );
1013 }
1014 }
1015
1016 fn clean_mmatch(action: &[u8], i: i64, mut s: i64, mut e: i64) -> bool {
1018 if i - s > XDL_SIMSCAN_WINDOW {
1019 s = i - XDL_SIMSCAN_WINDOW;
1020 }
1021 if e - i > XDL_SIMSCAN_WINDOW {
1022 e = i + XDL_SIMSCAN_WINDOW;
1023 }
1024
1025 let mut rdis0 = 0i64;
1026 let mut rpdis0 = 1i64;
1027 let mut r = 1i64;
1028 while i - r >= s {
1029 match action[(i - r) as usize] {
1030 DISCARD => rdis0 += 1,
1031 INVESTIGATE => rpdis0 += 1,
1032 _ => break, }
1034 r += 1;
1035 }
1036 if rdis0 == 0 {
1037 return false;
1038 }
1039 let mut rdis1 = 0i64;
1040 let mut rpdis1 = 1i64;
1041 let mut r = 1i64;
1042 while i + r <= e {
1043 match action[(i + r) as usize] {
1044 DISCARD => rdis1 += 1,
1045 INVESTIGATE => rpdis1 += 1,
1046 _ => break, }
1048 r += 1;
1049 }
1050 if rdis1 == 0 {
1051 return false;
1052 }
1053 rdis1 += rdis0;
1054 rpdis1 += rpdis0;
1055 rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1)
1056 }
1057
1058 pub fn changed_flags(ids1: &[u32], ids2: &[u32]) -> (Vec<bool>, Vec<bool>) {
1062 let nrec1 = ids1.len();
1063 let nrec2 = ids2.len();
1064
1065 use std::collections::HashMap;
1067 let mut count1: HashMap<u32, i64> = HashMap::new();
1068 let mut count2: HashMap<u32, i64> = HashMap::new();
1069 for &id in ids1 {
1070 *count1.entry(id).or_insert(0) += 1;
1071 }
1072 for &id in ids2 {
1073 *count2.entry(id).or_insert(0) += 1;
1074 }
1075
1076 let mut xdf1 = XdFile {
1077 ids: ids1,
1078 changed: vec![false; nrec1],
1079 reference_index: Vec::new(),
1080 dstart: 0,
1081 dend: nrec1 as i64 - 1,
1082 nreff: 0,
1083 };
1084 let mut xdf2 = XdFile {
1085 ids: ids2,
1086 changed: vec![false; nrec2],
1087 reference_index: Vec::new(),
1088 dstart: 0,
1089 dend: nrec2 as i64 - 1,
1090 nreff: 0,
1091 };
1092
1093 let lim = nrec1.min(nrec2) as i64;
1095 let mut i = 0i64;
1096 while i < lim && ids1[i as usize] == ids2[i as usize] {
1097 i += 1;
1098 }
1099 xdf1.dstart = i;
1100 xdf2.dstart = i;
1101 let mut j = 0i64;
1102 let rem = lim - i;
1103 while j < rem && ids1[nrec1 - 1 - j as usize] == ids2[nrec2 - 1 - j as usize] {
1104 j += 1;
1105 }
1106 xdf1.dend = nrec1 as i64 - j - 1;
1107 xdf2.dend = nrec2 as i64 - j - 1;
1108
1109 let mut action1 = vec![0u8; nrec1 + 1];
1111 let mut action2 = vec![0u8; nrec2 + 1];
1112
1113 let mut mlim = bogosqrt(nrec1 as i64);
1114 if mlim > XDL_MAX_EQLIMIT {
1115 mlim = XDL_MAX_EQLIMIT;
1116 }
1117 let mut idx = xdf1.dstart;
1118 while idx <= xdf1.dend {
1119 let id = ids1[idx as usize];
1120 let nm = *count2.get(&id).unwrap_or(&0);
1121 action1[idx as usize] = if nm == 0 {
1122 DISCARD
1123 } else if nm >= mlim {
1124 INVESTIGATE
1125 } else {
1126 KEEP
1127 };
1128 idx += 1;
1129 }
1130
1131 let mut mlim = bogosqrt(nrec2 as i64);
1132 if mlim > XDL_MAX_EQLIMIT {
1133 mlim = XDL_MAX_EQLIMIT;
1134 }
1135 let mut idx = xdf2.dstart;
1136 while idx <= xdf2.dend {
1137 let id = ids2[idx as usize];
1138 let nm = *count1.get(&id).unwrap_or(&0);
1139 action2[idx as usize] = if nm == 0 {
1140 DISCARD
1141 } else if nm >= mlim {
1142 INVESTIGATE
1143 } else {
1144 KEEP
1145 };
1146 idx += 1;
1147 }
1148
1149 let mut idx = xdf1.dstart;
1150 while idx <= xdf1.dend {
1151 let a = action1[idx as usize];
1152 if a == KEEP
1153 || (a == INVESTIGATE && !clean_mmatch(&action1, idx, xdf1.dstart, xdf1.dend))
1154 {
1155 xdf1.reference_index.push(idx as usize);
1156 } else {
1157 xdf1.changed[idx as usize] = true;
1158 }
1159 idx += 1;
1160 }
1161 xdf1.nreff = xdf1.reference_index.len() as i64;
1162
1163 let mut idx = xdf2.dstart;
1164 while idx <= xdf2.dend {
1165 let a = action2[idx as usize];
1166 if a == KEEP
1167 || (a == INVESTIGATE && !clean_mmatch(&action2, idx, xdf2.dstart, xdf2.dend))
1168 {
1169 xdf2.reference_index.push(idx as usize);
1170 } else {
1171 xdf2.changed[idx as usize] = true;
1172 }
1173 idx += 1;
1174 }
1175 xdf2.nreff = xdf2.reference_index.len() as i64;
1176
1177 let ndiags = xdf1.nreff + xdf2.nreff + 3;
1179 let kvd_len = (2 * ndiags + 2) as usize;
1180 let mut kvd = vec![0i64; kvd_len];
1181 let koff = xdf2.nreff + 1;
1183 let (kvdf_slice, kvdb_slice) = kvd.split_at_mut(ndiags as usize);
1184 let xenv = XdAlgoEnv {
1187 mxcost: bogosqrt(ndiags).max(XDL_MAX_COST_MIN),
1188 snake_cnt: XDL_SNAKE_CNT,
1189 heur_min: XDL_HEUR_MIN_COST,
1190 };
1191
1192 let nreff1 = xdf1.nreff;
1193 let nreff2 = xdf2.nreff;
1194 xdl_recs_cmp(
1195 &mut xdf1, 0, nreff1, &mut xdf2, 0, nreff2, kvdf_slice, kvdb_slice, koff, false, &xenv,
1196 );
1197
1198 (xdf1.changed, xdf2.changed)
1199 }
1200}
1201
1202fn changed_flags_to_ops(
1204 changed1: &[bool],
1205 changed2: &[bool],
1206 old_len: usize,
1207 new_len: usize,
1208) -> Vec<similar::DiffOp> {
1209 use similar::DiffOp;
1210 let mut ops: Vec<DiffOp> = Vec::new();
1211 let mut i = 0usize;
1212 let mut j = 0usize;
1213 while i < old_len || j < new_len {
1214 let del = i < old_len && changed1[i];
1215 let ins = j < new_len && changed2[j];
1216 if !del && !ins {
1217 let start_i = i;
1219 let start_j = j;
1220 while i < old_len && j < new_len && !changed1[i] && !changed2[j] {
1221 i += 1;
1222 j += 1;
1223 }
1224 ops.push(DiffOp::Equal {
1225 old_index: start_i,
1226 new_index: start_j,
1227 len: i - start_i,
1228 });
1229 } else {
1230 let start_i = i;
1232 let start_j = j;
1233 while i < old_len && changed1[i] {
1234 i += 1;
1235 }
1236 while j < new_len && changed2[j] {
1237 j += 1;
1238 }
1239 let dlen = i - start_i;
1240 let ilen = j - start_j;
1241 if dlen > 0 && ilen > 0 {
1242 ops.push(DiffOp::Replace {
1243 old_index: start_i,
1244 old_len: dlen,
1245 new_index: start_j,
1246 new_len: ilen,
1247 });
1248 } else if dlen > 0 {
1249 ops.push(DiffOp::Delete {
1250 old_index: start_i,
1251 old_len: dlen,
1252 new_index: start_j,
1253 });
1254 } else if ilen > 0 {
1255 ops.push(DiffOp::Insert {
1256 old_index: start_i,
1257 new_index: start_j,
1258 new_len: ilen,
1259 });
1260 }
1261 }
1262 }
1263 ops
1264}
1265
1266#[must_use]
1272pub fn word_diff_ops_imara(old_words: &[&str], new_words: &[&str]) -> Vec<similar::DiffOp> {
1273 use std::collections::HashMap;
1274
1275 let mut interner: HashMap<&str, u32> = HashMap::new();
1277 let mut ids1: Vec<u32> = Vec::with_capacity(old_words.len());
1278 for &w in old_words {
1279 let next = interner.len() as u32;
1280 let id = *interner.entry(w).or_insert(next);
1281 ids1.push(id);
1282 }
1283 let mut ids2: Vec<u32> = Vec::with_capacity(new_words.len());
1284 for &w in new_words {
1285 let next = interner.len() as u32;
1286 let id = *interner.entry(w).or_insert(next);
1287 ids2.push(id);
1288 }
1289
1290 let (changed1, changed2) = git_xdiff::changed_flags(&ids1, &ids2);
1291 let ops = changed_flags_to_ops(&changed1, &changed2, old_words.len(), new_words.len());
1292
1293 diff_indent_heuristic::apply_change_compact_to_ops(&ops, old_words, new_words, false)
1296}
1297
1298#[must_use]
1300pub fn diff_slice_ops_compacted(
1301 old_lines: &[&str],
1302 new_lines: &[&str],
1303 algorithm: similar::Algorithm,
1304 indent_heuristic: bool,
1305) -> Vec<similar::DiffOp> {
1306 diff_indent_heuristic::diff_slice_ops_compacted(
1307 old_lines,
1308 new_lines,
1309 algorithm,
1310 indent_heuristic,
1311 )
1312}
1313
1314#[must_use]
1316pub fn map_new_to_old_lines_compacted(
1317 old_joined: &str,
1318 new_joined: &str,
1319 algorithm: similar::Algorithm,
1320 indent_heuristic: bool,
1321 new_line_count: usize,
1322) -> Vec<Option<usize>> {
1323 let ops = diff_indent_heuristic::diff_lines_ops_compacted(
1324 old_joined,
1325 new_joined,
1326 algorithm,
1327 indent_heuristic,
1328 );
1329 diff_indent_heuristic::map_new_to_old_from_ops(&ops, new_line_count)
1330}
1331
1332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1334pub enum DiffStatus {
1335 Added,
1337 Deleted,
1339 Modified,
1341 Renamed,
1343 Copied,
1345 TypeChanged,
1347 Unmerged,
1349}
1350
1351impl DiffStatus {
1352 #[must_use]
1354 pub fn letter(&self) -> char {
1355 match self {
1356 Self::Added => 'A',
1357 Self::Deleted => 'D',
1358 Self::Modified => 'M',
1359 Self::Renamed => 'R',
1360 Self::Copied => 'C',
1361 Self::TypeChanged => 'T',
1362 Self::Unmerged => 'U',
1363 }
1364 }
1365}
1366
1367#[derive(Debug, Clone, PartialEq, Eq)]
1369pub struct DiffEntry {
1370 pub status: DiffStatus,
1372 pub old_path: Option<String>,
1374 pub new_path: Option<String>,
1376 pub old_mode: String,
1378 pub new_mode: String,
1380 pub old_oid: ObjectId,
1382 pub new_oid: ObjectId,
1384 pub score: Option<u32>,
1386}
1387
1388impl DiffEntry {
1389 #[must_use]
1391 pub fn path(&self) -> &str {
1392 self.new_path
1393 .as_deref()
1394 .or(self.old_path.as_deref())
1395 .unwrap_or("")
1396 }
1397
1398 #[must_use]
1403 pub fn display_path(&self) -> String {
1404 match self.status {
1405 DiffStatus::Renamed | DiffStatus::Copied => {
1406 let old = self.old_path.as_deref().unwrap_or("");
1407 let new = self.new_path.as_deref().unwrap_or("");
1408 if old.is_empty() || new.is_empty() {
1409 self.path().to_owned()
1410 } else {
1411 format!("{old} -> {new}")
1412 }
1413 }
1414 _ => self.path().to_owned(),
1415 }
1416 }
1417}
1418
1419pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
1421
1422#[must_use]
1424pub fn zero_oid() -> ObjectId {
1425 ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
1426 panic!("internal error: failed to create zero OID");
1428 })
1429}
1430
1431#[must_use]
1433pub fn empty_blob_oid() -> ObjectId {
1434 ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
1435 panic!("internal error: failed to create empty blob OID");
1437 })
1438}
1439
1440pub fn diff_trees(
1455 odb: &Odb,
1456 old_tree_oid: Option<&ObjectId>,
1457 new_tree_oid: Option<&ObjectId>,
1458 prefix: &str,
1459) -> Result<Vec<DiffEntry>> {
1460 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
1461}
1462
1463pub fn diff_trees_show_tree_entries(
1467 odb: &Odb,
1468 old_tree_oid: Option<&ObjectId>,
1469 new_tree_oid: Option<&ObjectId>,
1470 prefix: &str,
1471) -> Result<Vec<DiffEntry>> {
1472 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
1473}
1474
1475fn diff_trees_opts(
1476 odb: &Odb,
1477 old_tree_oid: Option<&ObjectId>,
1478 new_tree_oid: Option<&ObjectId>,
1479 prefix: &str,
1480 show_trees: bool,
1481) -> Result<Vec<DiffEntry>> {
1482 let old_entries = match old_tree_oid {
1483 Some(oid) => read_tree(odb, oid)?,
1484 None => Vec::new(),
1485 };
1486 let new_entries = match new_tree_oid {
1487 Some(oid) => read_tree(odb, oid)?,
1488 None => Vec::new(),
1489 };
1490
1491 let mut result = Vec::new();
1492 diff_tree_entries_opts(
1493 odb,
1494 &old_entries,
1495 &new_entries,
1496 prefix,
1497 show_trees,
1498 &mut result,
1499 )?;
1500 Ok(result)
1501}
1502
1503fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
1505 let obj = odb.read(oid)?;
1506 if obj.kind != ObjectKind::Tree {
1507 return Err(Error::CorruptObject(format!(
1508 "expected tree, got {}",
1509 obj.kind.as_str()
1510 )));
1511 }
1512 parse_tree(&obj.data)
1513}
1514
1515fn diff_tree_entries_opts(
1517 odb: &Odb,
1518 old: &[TreeEntry],
1519 new: &[TreeEntry],
1520 prefix: &str,
1521 show_trees: bool,
1522 result: &mut Vec<DiffEntry>,
1523) -> Result<()> {
1524 let mut oi = 0;
1525 let mut ni = 0;
1526
1527 while oi < old.len() || ni < new.len() {
1528 match (old.get(oi), new.get(ni)) {
1529 (Some(o), Some(n)) => {
1530 let cmp = crate::objects::tree_entry_cmp(
1531 &o.name,
1532 is_tree_mode(o.mode),
1533 &n.name,
1534 is_tree_mode(n.mode),
1535 );
1536 match cmp {
1537 std::cmp::Ordering::Less => {
1538 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1540 oi += 1;
1541 }
1542 std::cmp::Ordering::Greater => {
1543 emit_added_opts(odb, n, prefix, show_trees, result)?;
1545 ni += 1;
1546 }
1547 std::cmp::Ordering::Equal => {
1548 if o.oid != n.oid || o.mode != n.mode {
1550 let name_str = String::from_utf8_lossy(&o.name);
1551 let path = format_path(prefix, &name_str);
1552 if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
1553 if show_trees {
1555 result.push(DiffEntry {
1556 status: DiffStatus::Modified,
1557 old_path: Some(path.clone()),
1558 new_path: Some(path.clone()),
1559 old_mode: format_mode(o.mode),
1560 new_mode: format_mode(n.mode),
1561 old_oid: o.oid,
1562 new_oid: n.oid,
1563 score: None,
1564 });
1565 }
1566 let nested = diff_trees_opts(
1568 odb,
1569 Some(&o.oid),
1570 Some(&n.oid),
1571 &path,
1572 show_trees,
1573 )?;
1574 result.extend(nested);
1575 } else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
1576 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1578 emit_added_opts(odb, n, prefix, show_trees, result)?;
1579 } else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
1580 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1582 emit_added_opts(odb, n, prefix, show_trees, result)?;
1583 } else {
1584 let old_type = o.mode & 0o170000;
1588 let new_type = n.mode & 0o170000;
1589 result.push(DiffEntry {
1590 status: if old_type != new_type {
1591 DiffStatus::TypeChanged
1592 } else {
1593 DiffStatus::Modified
1594 },
1595 old_path: Some(path.clone()),
1596 new_path: Some(path),
1597 old_mode: format_mode(o.mode),
1598 new_mode: format_mode(n.mode),
1599 old_oid: o.oid,
1600 new_oid: n.oid,
1601 score: None,
1602 });
1603 }
1604 }
1605 oi += 1;
1606 ni += 1;
1607 }
1608 }
1609 }
1610 (Some(o), None) => {
1611 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
1612 oi += 1;
1613 }
1614 (None, Some(n)) => {
1615 emit_added_opts(odb, n, prefix, show_trees, result)?;
1616 ni += 1;
1617 }
1618 (None, None) => break,
1619 }
1620 }
1621
1622 Ok(())
1623}
1624
1625fn emit_deleted_opts(
1626 odb: &Odb,
1627 entry: &TreeEntry,
1628 prefix: &str,
1629 show_trees: bool,
1630 result: &mut Vec<DiffEntry>,
1631) -> Result<()> {
1632 let name_str = String::from_utf8_lossy(&entry.name);
1633 let path = format_path(prefix, &name_str);
1634 if is_tree_mode(entry.mode) {
1635 if show_trees {
1636 result.push(DiffEntry {
1637 status: DiffStatus::Deleted,
1638 old_path: Some(path.clone()),
1639 new_path: None,
1640 old_mode: format_mode(entry.mode),
1641 new_mode: "000000".to_owned(),
1642 old_oid: entry.oid,
1643 new_oid: zero_oid(),
1644 score: None,
1645 });
1646 }
1647 let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
1649 result.extend(nested);
1650 } else {
1651 result.push(DiffEntry {
1652 status: DiffStatus::Deleted,
1653 old_path: Some(path.clone()),
1654 new_path: None,
1655 old_mode: format_mode(entry.mode),
1656 new_mode: "000000".to_owned(),
1657 old_oid: entry.oid,
1658 new_oid: zero_oid(),
1659 score: None,
1660 });
1661 }
1662 Ok(())
1663}
1664
1665fn emit_added_opts(
1666 odb: &Odb,
1667 entry: &TreeEntry,
1668 prefix: &str,
1669 show_trees: bool,
1670 result: &mut Vec<DiffEntry>,
1671) -> Result<()> {
1672 let name_str = String::from_utf8_lossy(&entry.name);
1673 let path = format_path(prefix, &name_str);
1674 if is_tree_mode(entry.mode) {
1675 if show_trees {
1676 result.push(DiffEntry {
1677 status: DiffStatus::Added,
1678 old_path: None,
1679 new_path: Some(path.clone()),
1680 old_mode: "000000".to_owned(),
1681 new_mode: format_mode(entry.mode),
1682 old_oid: zero_oid(),
1683 new_oid: entry.oid,
1684 score: None,
1685 });
1686 }
1687 let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
1689 result.extend(nested);
1690 } else {
1691 result.push(DiffEntry {
1692 status: DiffStatus::Added,
1693 old_path: None,
1694 new_path: Some(path),
1695 old_mode: "000000".to_owned(),
1696 new_mode: format_mode(entry.mode),
1697 old_oid: zero_oid(),
1698 new_oid: entry.oid,
1699 score: None,
1700 });
1701 }
1702 Ok(())
1703}
1704
1705pub fn diff_index_to_tree(
1725 odb: &Odb,
1726 index: &Index,
1727 tree_oid: Option<&ObjectId>,
1728 ignore_submodules: bool,
1729) -> Result<Vec<DiffEntry>> {
1730 let tree_entries = match tree_oid {
1732 Some(oid) => flatten_tree(odb, oid, "")?,
1733 None => Vec::new(),
1734 };
1735
1736 let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
1738 std::collections::BTreeMap::new();
1739 for entry in &tree_entries {
1740 tree_map.insert(&entry.path, entry);
1741 }
1742
1743 let mut result = Vec::new();
1744 let mut stage0_paths = std::collections::BTreeSet::new();
1745 let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
1746 std::collections::BTreeMap::new();
1747
1748 for ie in &index.entries {
1750 let path = String::from_utf8_lossy(&ie.path).to_string();
1751 if ie.stage() == 0 && ie.intent_to_add() {
1752 continue;
1755 }
1756 if ie.stage() != 0 {
1757 let rank = match ie.stage() {
1758 2 => 0u8,
1759 3 => 1u8,
1760 1 => 2u8,
1761 _ => 3u8,
1762 };
1763 match unmerged_modes.get(&path) {
1764 Some((existing_rank, _)) if *existing_rank <= rank => {}
1765 _ => {
1766 unmerged_modes.insert(path, (rank, ie.mode));
1767 }
1768 }
1769 continue;
1770 }
1771 if ignore_submodules && ie.mode == 0o160000 {
1772 let _ = tree_map.remove(path.as_str());
1773 stage0_paths.insert(path.clone());
1774 continue;
1775 }
1776 stage0_paths.insert(path.clone());
1777 match tree_map.remove(path.as_str()) {
1778 Some(te) => {
1779 if te.oid != ie.oid || te.mode != ie.mode {
1781 result.push(DiffEntry {
1782 status: DiffStatus::Modified,
1783 old_path: Some(path.clone()),
1784 new_path: Some(path),
1785 old_mode: format_mode(te.mode),
1786 new_mode: format_mode(ie.mode),
1787 old_oid: te.oid,
1788 new_oid: ie.oid,
1789 score: None,
1790 });
1791 }
1792 }
1793 None => {
1794 result.push(DiffEntry {
1796 status: DiffStatus::Added,
1797 old_path: None,
1798 new_path: Some(path),
1799 old_mode: "000000".to_owned(),
1800 new_mode: format_mode(ie.mode),
1801 old_oid: zero_oid(),
1802 new_oid: ie.oid,
1803 score: None,
1804 });
1805 }
1806 }
1807 }
1808
1809 for (path, (_, mode)) in &unmerged_modes {
1810 if stage0_paths.contains(path) {
1811 continue;
1812 }
1813 tree_map.remove(path.as_str());
1814 result.push(DiffEntry {
1815 status: DiffStatus::Unmerged,
1816 old_path: Some(path.clone()),
1817 new_path: Some(path.clone()),
1818 old_mode: "000000".to_owned(),
1819 new_mode: format_mode(*mode),
1820 old_oid: zero_oid(),
1821 new_oid: zero_oid(),
1822 score: None,
1823 });
1824 }
1825
1826 for (path, te) in tree_map {
1828 if ignore_submodules && te.mode == 0o160000 {
1829 continue;
1830 }
1831 result.push(DiffEntry {
1832 status: DiffStatus::Deleted,
1833 old_path: Some(path.to_owned()),
1834 new_path: None,
1835 old_mode: format_mode(te.mode),
1836 new_mode: "000000".to_owned(),
1837 old_oid: te.oid,
1838 new_oid: zero_oid(),
1839 score: None,
1840 });
1841 }
1842
1843 result.sort_by(|a, b| a.path().cmp(b.path()));
1844 Ok(result)
1845}
1846
1847pub fn diff_index_to_worktree(
1872 odb: &Odb,
1873 index: &Index,
1874 work_tree: &Path,
1875 ignore_submodule_untracked: bool,
1876 simplify_gitlinks: bool,
1877) -> Result<Vec<DiffEntry>> {
1878 diff_index_to_worktree_with_options(
1879 odb,
1880 index,
1881 work_tree,
1882 DiffIndexToWorktreeOptions {
1883 ignore_submodule_untracked,
1884 simplify_gitlinks,
1885 ..DiffIndexToWorktreeOptions::default()
1886 },
1887 )
1888}
1889
1890#[derive(Debug, Clone, Copy, Default)]
1892pub struct DiffIndexToWorktreeOptions {
1893 pub index_mtime: Option<(u32, u32)>,
1899 pub ignore_submodule_untracked: bool,
1901 pub simplify_gitlinks: bool,
1903 pub error_on_broken_gitlinks: bool,
1906}
1907
1908pub fn diff_index_to_worktree_with_options(
1925 odb: &Odb,
1926 index: &Index,
1927 work_tree: &Path,
1928 options: DiffIndexToWorktreeOptions,
1929) -> Result<Vec<DiffEntry>> {
1930 use crate::config::ConfigSet;
1931 use crate::crlf;
1932
1933 let ignore_submodule_untracked = options.ignore_submodule_untracked;
1934 let simplify_gitlinks = options.simplify_gitlinks;
1935
1936 let git_dir = work_tree.join(".git");
1937 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1938 let conv = crlf::ConversionConfig::from_config(&config);
1939 let attrs = crlf::load_gitattributes(work_tree);
1940
1941 let mut result = Vec::new();
1942 let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
1943 std::collections::BTreeMap::new();
1944
1945 for ie in &index.entries {
1946 if ie.stage() != 0 {
1947 let path = String::from_utf8_lossy(&ie.path).to_string();
1948 let rank = match ie.stage() {
1949 2 => 0u8,
1950 3 => 1u8,
1951 1 => 2u8,
1952 _ => 3u8,
1953 };
1954 match unmerged_base.get(&path) {
1955 Some((existing_rank, _)) if *existing_rank <= rank => {}
1956 _ => {
1957 unmerged_base.insert(path, (rank, ie));
1958 }
1959 }
1960 continue;
1961 }
1962 if ie.skip_worktree() || ie.assume_unchanged() {
1965 continue;
1966 }
1967 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
1970 let is_intent_to_add = ie.intent_to_add();
1971
1972 if ie.mode == 0o160000 {
1977 let sub_dir = work_tree.join(path_str_ref);
1978 let sub_head_oid = read_submodule_head_oid(&sub_dir);
1979 if !simplify_gitlinks && sub_head_oid.is_none() && !sub_dir.exists() {
1986 let path_owned = path_str_ref.to_owned();
1987 result.push(DiffEntry {
1988 status: DiffStatus::Deleted,
1989 old_path: Some(path_owned.clone()),
1990 new_path: Some(path_owned),
1991 old_mode: format_mode(ie.mode),
1992 new_mode: "000000".to_owned(),
1993 old_oid: ie.oid,
1994 new_oid: zero_oid(),
1995 score: None,
1996 });
1997 continue;
1998 }
1999 let ref_matches = if let Some(oid) = sub_head_oid {
2000 oid == ie.oid
2001 } else {
2002 let is_placeholder = submodule_worktree_is_unpopulated_placeholder(&sub_dir);
2003 if options.error_on_broken_gitlinks
2004 && !is_placeholder
2005 && submodule_embedded_git_dir(&sub_dir).is_some()
2006 {
2007 return Err(Error::ConfigError(format!(
2008 "could not read submodule HEAD for '{path_str_ref}'"
2009 )));
2010 }
2011 is_placeholder
2012 };
2013 if simplify_gitlinks {
2014 if !ref_matches {
2015 let path_owned = path_str_ref.to_owned();
2016 let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
2017 result.push(DiffEntry {
2018 status: DiffStatus::Modified,
2019 old_path: Some(path_owned.clone()),
2020 new_path: Some(path_owned),
2021 old_mode: format_mode(ie.mode),
2022 new_mode: format_mode(ie.mode),
2023 old_oid: ie.oid,
2024 new_oid,
2025 score: None,
2026 });
2027 }
2028 continue;
2029 }
2030 if sub_head_oid.is_some() && submodule_head_object_broken(&sub_dir) {
2035 return Err(Error::ConfigError(format!(
2036 "'git status --porcelain=2' failed in submodule {path_str_ref}"
2037 )));
2038 }
2039 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
2040 if ignore_submodule_untracked {
2041 flags.untracked = false;
2042 }
2043 let inner_dirty = flags.modified || flags.untracked;
2044 if !ref_matches || inner_dirty {
2045 let path_owned = path_str_ref.to_owned();
2046 let new_oid = if !ref_matches {
2047 sub_head_oid.unwrap_or_else(zero_oid)
2048 } else {
2049 zero_oid()
2050 };
2051 result.push(DiffEntry {
2052 status: DiffStatus::Modified,
2053 old_path: Some(path_owned.clone()),
2054 new_path: Some(path_owned),
2055 old_mode: format_mode(ie.mode),
2056 new_mode: format_mode(ie.mode),
2057 old_oid: ie.oid,
2058 new_oid,
2059 score: None,
2060 });
2061 }
2062 continue;
2063 }
2064
2065 let file_path = work_tree.join(path_str_ref);
2066
2067 if is_intent_to_add {
2068 match fs::symlink_metadata(&file_path) {
2069 Ok(meta) => {
2070 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
2071 let worktree_oid = hash_worktree_file(
2072 odb,
2073 &file_path,
2074 &meta,
2075 &conv,
2076 &file_attrs,
2077 path_str_ref,
2078 None,
2079 )?;
2080 let worktree_mode = mode_from_metadata(&meta);
2081 result.push(DiffEntry {
2082 status: DiffStatus::Added,
2083 old_path: None,
2084 new_path: Some(path_str_ref.to_owned()),
2085 old_mode: "000000".to_owned(),
2086 new_mode: format_mode(worktree_mode),
2087 old_oid: zero_oid(),
2090 new_oid: worktree_oid,
2091 score: None,
2092 });
2093 }
2094 Err(e)
2095 if e.kind() == std::io::ErrorKind::NotFound
2096 || e.raw_os_error() == Some(20) =>
2097 {
2098 result.push(DiffEntry {
2099 status: DiffStatus::Deleted,
2100 old_path: Some(path_str_ref.to_owned()),
2101 new_path: None,
2102 old_mode: format_mode(ie.mode),
2103 new_mode: "000000".to_owned(),
2104 old_oid: ie.oid,
2105 new_oid: zero_oid(),
2106 score: None,
2107 });
2108 }
2109 Err(e) => return Err(Error::Io(e)),
2110 }
2111 continue;
2112 }
2113
2114 if has_symlink_in_path(work_tree, path_str_ref) {
2117 result.push(DiffEntry {
2118 status: DiffStatus::Deleted,
2119 old_path: Some(path_str_ref.to_owned()),
2120 new_path: None,
2121 old_mode: format_mode(ie.mode),
2122 new_mode: "000000".to_owned(),
2123 old_oid: ie.oid,
2124 new_oid: zero_oid(),
2125 score: None,
2126 });
2127 continue;
2128 }
2129
2130 match fs::symlink_metadata(&file_path) {
2131 Ok(meta) if meta.is_dir() => {
2132 if file_path.join(".git").exists() {
2137 let head = read_submodule_head_oid(&file_path).unwrap_or_else(zero_oid);
2138 let path_owned = path_str_ref.to_owned();
2139 result.push(DiffEntry {
2140 status: DiffStatus::TypeChanged,
2141 old_path: Some(path_owned.clone()),
2142 new_path: Some(path_owned),
2143 old_mode: format_mode(ie.mode),
2144 new_mode: format_mode(0o160000),
2145 old_oid: ie.oid,
2146 new_oid: head,
2147 score: None,
2148 });
2149 continue;
2150 }
2151 result.push(DiffEntry {
2152 status: DiffStatus::Deleted,
2153 old_path: Some(path_str_ref.to_owned()),
2154 new_path: None,
2155 old_mode: format_mode(ie.mode),
2156 new_mode: String::new(),
2157 old_oid: ie.oid,
2158 new_oid: zero_oid(),
2159 score: None,
2160 });
2161 }
2162 Ok(meta) => {
2163 let worktree_mode = mode_from_metadata(&meta);
2164 let stat_same = stat_matches(ie, &meta);
2165 if stat_same && worktree_mode != ie.mode {
2167 let path_owned = path_str_ref.to_owned();
2168 result.push(DiffEntry {
2169 status: DiffStatus::Modified,
2170 old_path: Some(path_owned.clone()),
2171 new_path: Some(path_owned),
2172 old_mode: format_mode(ie.mode),
2173 new_mode: format_mode(worktree_mode),
2174 old_oid: ie.oid,
2175 new_oid: ie.oid,
2176 score: None,
2177 });
2178 continue;
2179 }
2180
2181 if stat_same && worktree_mode == ie.mode && !entry_is_racy(ie, options.index_mtime) {
2184 continue;
2185 }
2186
2187 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
2189 let worktree_oid = hash_worktree_file(
2190 odb,
2191 &file_path,
2192 &meta,
2193 &conv,
2194 &file_attrs,
2195 path_str_ref,
2196 Some(ie),
2197 )?;
2198
2199 let mut eff_oid = worktree_oid;
2203 if eff_oid != ie.oid {
2204 if let Ok(raw) = fs::read(&file_path) {
2205 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2206 if raw_oid == ie.oid {
2207 eff_oid = ie.oid;
2208 }
2209 }
2210 }
2211
2212 if eff_oid != ie.oid || worktree_mode != ie.mode {
2213 let path_owned = path_str_ref.to_owned();
2214 result.push(DiffEntry {
2215 status: DiffStatus::Modified,
2216 old_path: Some(path_owned.clone()),
2217 new_path: Some(path_owned),
2218 old_mode: format_mode(ie.mode),
2219 new_mode: format_mode(worktree_mode),
2220 old_oid: ie.oid,
2221 new_oid: eff_oid,
2222 score: None,
2223 });
2224 }
2225 }
2226 Err(e) if e.kind() == std::io::ErrorKind::NotFound
2227 || e.raw_os_error() == Some(20) => {
2228 result.push(DiffEntry {
2230 status: DiffStatus::Deleted,
2231 old_path: Some(path_str_ref.to_owned()),
2232 new_path: None,
2233 old_mode: format_mode(ie.mode),
2234 new_mode: "000000".to_owned(),
2235 old_oid: ie.oid,
2236 new_oid: zero_oid(),
2237 score: None,
2238 });
2239 }
2240 Err(e) => return Err(Error::Io(e)),
2241 }
2242 }
2243
2244 for (path, (_, base_entry)) in unmerged_base {
2245 let file_path = work_tree.join(&path);
2246 let wt_meta = match fs::symlink_metadata(&file_path) {
2247 Ok(meta) => Some(meta),
2248 Err(e)
2249 if e.kind() == std::io::ErrorKind::NotFound
2250 || e.raw_os_error() == Some(20) =>
2251 {
2252 None
2253 }
2254 Err(e) => return Err(Error::Io(e)),
2255 };
2256
2257 let new_mode = wt_meta.as_ref().map_or_else(
2258 || "000000".to_owned(),
2259 |meta| format_mode(mode_from_metadata(meta)),
2260 );
2261 result.push(DiffEntry {
2262 status: DiffStatus::Unmerged,
2263 old_path: Some(path.clone()),
2264 new_path: Some(path.clone()),
2265 old_mode: "000000".to_owned(),
2266 new_mode,
2267 old_oid: zero_oid(),
2268 new_oid: zero_oid(),
2269 score: None,
2270 });
2271
2272 if let Some(meta) = wt_meta {
2273 let file_attrs = crlf::get_file_attrs(&attrs, &path, false, &config);
2274 let wt_oid = hash_worktree_file(
2275 odb,
2276 &file_path,
2277 &meta,
2278 &conv,
2279 &file_attrs,
2280 &path,
2281 Some(base_entry),
2282 )?;
2283 let wt_mode = mode_from_metadata(&meta);
2284 if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
2285 result.push(DiffEntry {
2286 status: DiffStatus::Modified,
2287 old_path: Some(path.clone()),
2288 new_path: Some(path),
2289 old_mode: format_mode(base_entry.mode),
2290 new_mode: format_mode(wt_mode),
2291 old_oid: base_entry.oid,
2292 new_oid: wt_oid,
2293 score: None,
2294 });
2295 }
2296 }
2297 }
2298
2299 Ok(result)
2300}
2301
2302fn entry_is_racy(ie: &IndexEntry, index_mtime: Option<(u32, u32)>) -> bool {
2303 let Some((index_mtime_sec, index_mtime_nsec)) = index_mtime else {
2304 return false;
2305 };
2306 if index_mtime_sec == 0 {
2307 return false;
2308 }
2309 index_mtime_sec < ie.mtime_sec
2310 || (index_mtime_sec == ie.mtime_sec && index_mtime_nsec <= ie.mtime_nsec)
2311}
2312
2313pub fn worktree_differs_from_index_entry(
2321 odb: &Odb,
2322 work_tree: &Path,
2323 ie: &IndexEntry,
2324 ignore_submodule_untracked: bool,
2325) -> Result<bool> {
2326 use crate::config::ConfigSet;
2327 use crate::crlf;
2328
2329 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
2330 let file_path = work_tree.join(path_str_ref);
2331
2332 if ie.mode == 0o160000 {
2333 let sub_head_oid = read_submodule_head(&file_path);
2334 let ref_matches = match sub_head_oid {
2335 Some(oid) => oid == ie.oid,
2336 None => submodule_worktree_is_unpopulated_placeholder(&file_path),
2337 };
2338 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
2339 if ignore_submodule_untracked {
2340 flags.untracked = false;
2341 }
2342 return Ok(!ref_matches || flags.modified || flags.untracked);
2343 }
2344
2345 let meta = match fs::symlink_metadata(&file_path) {
2346 Ok(m) => m,
2347 Err(e)
2348 if e.kind() == std::io::ErrorKind::NotFound
2349 || e.raw_os_error() == Some(20) =>
2350 {
2351 return Ok(true);
2352 }
2353 Err(e) => return Err(Error::Io(e)),
2354 };
2355
2356 if meta.is_dir() {
2357 return Ok(true);
2358 }
2359
2360 let worktree_mode = mode_from_metadata(&meta);
2361 if worktree_mode != ie.mode {
2362 return Ok(true);
2363 }
2364
2365 let git_dir = work_tree.join(".git");
2366 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
2367 let conv = crlf::ConversionConfig::from_config(&config);
2368 let attrs = crlf::load_gitattributes(work_tree);
2369 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
2370 let worktree_oid = hash_worktree_file(
2371 odb,
2372 &file_path,
2373 &meta,
2374 &conv,
2375 &file_attrs,
2376 path_str_ref,
2377 Some(ie),
2378 )?;
2379
2380 let mut eff_oid = worktree_oid;
2381 if eff_oid != ie.oid {
2382 if let Ok(raw) = fs::read(&file_path) {
2383 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2384 if raw_oid == ie.oid {
2385 eff_oid = ie.oid;
2386 }
2387 }
2388 }
2389
2390 Ok(eff_oid != ie.oid)
2391}
2392
2393pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
2394 if meta.len() as u32 != ie.size {
2396 return false;
2397 }
2398 #[cfg(unix)]
2399 {
2400 use std::os::unix::fs::MetadataExt;
2401 if meta.mtime() as u32 != ie.mtime_sec {
2403 return false;
2404 }
2405 if meta.mtime_nsec() as u32 != ie.mtime_nsec {
2406 return false;
2407 }
2408 if meta.ctime() as u32 != ie.ctime_sec {
2410 return false;
2411 }
2412 if meta.ctime_nsec() as u32 != ie.ctime_nsec {
2413 return false;
2414 }
2415 if meta.ino() as u32 != ie.ino {
2417 return false;
2418 }
2419 if meta.dev() as u32 != ie.dev {
2420 return false;
2421 }
2422 }
2423 #[cfg(not(unix))]
2424 {
2425 use std::time::UNIX_EPOCH;
2426 if let Ok(mtime) = meta.modified() {
2427 if let Ok(dur) = mtime.duration_since(UNIX_EPOCH) {
2428 if dur.as_secs() as u32 != ie.mtime_sec {
2429 return false;
2430 }
2431 if dur.subsec_nanos() != ie.mtime_nsec {
2432 return false;
2433 }
2434 }
2435 }
2436 }
2437 true
2438}
2439
2440pub fn refresh_index_stat_content_verified(
2460 index: &mut Index,
2461 work_tree: &Path,
2462 index_mtime: Option<(u32, u32)>,
2463) -> bool {
2464 use crate::index::{MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK};
2465 let mut changed = false;
2466 for ie in &mut index.entries {
2467 if ie.stage() != 0 || ie.skip_worktree() || ie.assume_unchanged() || ie.intent_to_add() {
2468 continue;
2469 }
2470 if ie.mode != MODE_REGULAR && ie.mode != MODE_EXECUTABLE && ie.mode != MODE_SYMLINK {
2471 continue;
2472 }
2473 let Ok(path) = std::str::from_utf8(&ie.path) else {
2474 continue;
2475 };
2476 let abs = work_tree.join(path);
2477 let Ok(meta) = fs::symlink_metadata(&abs) else {
2478 continue;
2479 };
2480 if stat_matches(ie, &meta) {
2481 if entry_is_racy(ie, index_mtime)
2487 && !worktree_content_matches_index_oid(ie, &abs, &meta)
2488 {
2489 invalidate_index_stat_cache(ie);
2490 changed = true;
2491 }
2492 continue;
2493 }
2494 if !worktree_content_matches_index_oid(ie, &abs, &meta) {
2495 continue;
2496 }
2497 let refreshed = crate::index::entry_from_metadata(&meta, &ie.path, ie.oid, ie.mode);
2498 ie.ctime_sec = refreshed.ctime_sec;
2499 ie.ctime_nsec = refreshed.ctime_nsec;
2500 ie.mtime_sec = refreshed.mtime_sec;
2501 ie.mtime_nsec = refreshed.mtime_nsec;
2502 ie.dev = refreshed.dev;
2503 ie.ino = refreshed.ino;
2504 ie.uid = refreshed.uid;
2505 ie.gid = refreshed.gid;
2506 ie.size = refreshed.size;
2507 changed = true;
2508 }
2509 changed
2510}
2511
2512fn worktree_content_matches_index_oid(ie: &IndexEntry, abs: &Path, meta: &fs::Metadata) -> bool {
2514 use crate::index::{MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK};
2515 if ie.mode == MODE_SYMLINK {
2516 if !meta.file_type().is_symlink() {
2517 return false;
2518 }
2519 use std::os::unix::ffi::OsStrExt as _;
2520 fs::read_link(abs)
2521 .map(|t| Odb::hash_object_data(ObjectKind::Blob, t.as_os_str().as_bytes()) == ie.oid)
2522 .unwrap_or(false)
2523 } else if ie.mode == MODE_REGULAR || ie.mode == MODE_EXECUTABLE {
2524 if !meta.file_type().is_file() {
2525 return false;
2526 }
2527 fs::read(abs)
2528 .map(|bytes| Odb::hash_object_data(ObjectKind::Blob, &bytes) == ie.oid)
2529 .unwrap_or(false)
2530 } else {
2531 false
2532 }
2533}
2534
2535fn invalidate_index_stat_cache(ie: &mut IndexEntry) {
2537 ie.ctime_sec = 0;
2538 ie.ctime_nsec = 0;
2539 ie.mtime_sec = 0;
2540 ie.mtime_nsec = 0;
2541 ie.dev = 0;
2542 ie.ino = 0;
2543 ie.size = 0;
2544}
2545
2546fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
2549 let mut check = work_tree.to_path_buf();
2550 let components: Vec<&str> = rel_path.split('/').collect();
2551 for component in &components[..components.len().saturating_sub(1)] {
2553 check.push(component);
2554 match fs::symlink_metadata(&check) {
2555 Ok(meta) if meta.file_type().is_symlink() => return true,
2556 _ => {}
2557 }
2558 }
2559 false
2560}
2561
2562pub fn hash_worktree_file(
2563 odb: &Odb,
2564 path: &Path,
2565 meta: &fs::Metadata,
2566 conv: &crate::crlf::ConversionConfig,
2567 file_attrs: &crate::crlf::FileAttrs,
2568 rel_path: &str,
2569 index_entry: Option<&IndexEntry>,
2570) -> Result<ObjectId> {
2571 let prior_blob: Option<Vec<u8>> = index_entry
2572 .filter(|e| e.oid != zero_oid())
2573 .and_then(|e| odb.read(&e.oid).ok().map(|o| o.data));
2574 let data = if meta.file_type().is_symlink() {
2575 let target = fs::read_link(path)?;
2577 target.to_string_lossy().into_owned().into_bytes()
2578 } else if meta.is_dir() {
2579 Vec::new()
2582 } else {
2583 let raw = fs::read(path)?;
2584 let opts = crate::crlf::ConvertToGitOpts {
2587 index_blob: prior_blob.as_deref(),
2588 renormalize: false,
2589 check_safecrlf: false,
2590 };
2591 crate::crlf::convert_to_git_with_opts(&raw, rel_path, conv, file_attrs, opts).unwrap_or(raw)
2592 };
2593
2594 Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
2595}
2596
2597pub fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
2599 if meta.file_type().is_symlink() {
2600 0o120000
2601 } else {
2602 #[cfg(unix)]
2603 {
2604 if meta.mode() & 0o111 != 0 {
2605 return 0o100755;
2606 }
2607 }
2608 0o100644
2609 }
2610}
2611
2612pub fn diff_tree_to_worktree(
2629 odb: &Odb,
2630 tree_oid: Option<&ObjectId>,
2631 work_tree: &Path,
2632 index: &Index,
2633) -> Result<Vec<DiffEntry>> {
2634 use crate::config::ConfigSet;
2635 use crate::crlf;
2636
2637 let git_dir = work_tree.join(".git");
2638 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
2639 let conv = crlf::ConversionConfig::from_config(&config);
2640 let attrs = crlf::load_gitattributes(work_tree);
2641
2642 let tree_flat = match tree_oid {
2644 Some(oid) => flatten_tree(odb, oid, "")?,
2645 None => Vec::new(),
2646 };
2647 let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
2648 tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
2649
2650 let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
2652 std::collections::BTreeMap::new();
2653 let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
2654 let mut stage0_paths: std::collections::BTreeSet<Vec<u8>> = std::collections::BTreeSet::new();
2655 for ie in &index.entries {
2656 if ie.stage() != 0 {
2657 continue;
2658 }
2659 let path = String::from_utf8_lossy(&ie.path).to_string();
2660 index_entries.insert(&ie.path, ie);
2661 index_paths.insert(path);
2662 stage0_paths.insert(ie.path.clone());
2663 }
2664
2665 let mut unmerged_only_paths: std::collections::BTreeSet<String> =
2668 std::collections::BTreeSet::new();
2669 for ie in &index.entries {
2670 if !(1..=3).contains(&ie.stage()) {
2671 continue;
2672 }
2673 if stage0_paths.contains(&ie.path) {
2674 continue;
2675 }
2676 unmerged_only_paths.insert(String::from_utf8_lossy(&ie.path).into_owned());
2677 }
2678
2679 let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
2681 all_paths.extend(tree_map.keys().cloned());
2682 all_paths.extend(index_paths.iter().cloned());
2683 all_paths.extend(unmerged_only_paths.iter().cloned());
2684
2685 let mut result = Vec::new();
2686
2687 for path in &all_paths {
2688 if index_entries
2689 .get(path.as_bytes())
2690 .is_some_and(|ie| ie.skip_worktree())
2691 {
2692 continue;
2695 }
2696
2697 let tree_entry = tree_map.get(path.as_str());
2698
2699 let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
2701 || index_entries
2702 .get(path.as_bytes())
2703 .is_some_and(|ie| ie.mode == 0o160000);
2704 if is_gitlink {
2705 let sub_dir = work_tree.join(path);
2706 let index_gitlink_oid = index_entries
2707 .get(path.as_bytes())
2708 .filter(|ie| ie.mode == 0o160000)
2709 .map(|ie| ie.oid);
2710 match (tree_entry, index_gitlink_oid) {
2711 (Some(te), _) => {
2712 let sub_head = read_submodule_head_oid(&sub_dir);
2713 if sub_head.is_none() && !sub_dir.exists() {
2717 result.push(DiffEntry {
2718 status: DiffStatus::Deleted,
2719 old_path: Some(path.clone()),
2720 new_path: Some(path.clone()),
2721 old_mode: format_mode(te.mode),
2722 new_mode: "000000".to_string(),
2723 old_oid: te.oid,
2724 new_oid: zero_oid(),
2725 score: None,
2726 });
2727 continue;
2728 }
2729 let index_matches_tree = index_gitlink_oid.is_some_and(|oid| oid == te.oid);
2730 let head_differs = sub_head.as_ref() != Some(&te.oid);
2731 let dirty_while_aligned = index_matches_tree
2732 && !head_differs
2733 && submodule_has_dirty_worktree_for_super_diff(work_tree, path, &te.oid);
2734 if head_differs || dirty_while_aligned {
2735 let new_oid = if head_differs { zero_oid() } else { te.oid };
2739 result.push(DiffEntry {
2740 status: DiffStatus::Modified,
2741 old_path: Some(path.clone()),
2742 new_path: Some(path.clone()),
2743 old_mode: format_mode(te.mode),
2744 new_mode: format_mode(te.mode),
2745 old_oid: te.oid,
2746 new_oid,
2747 score: None,
2748 });
2749 }
2750 }
2751 (None, Some(idx_oid)) => {
2752 let new_oid = read_submodule_head_oid(&sub_dir).unwrap_or(idx_oid);
2757 result.push(DiffEntry {
2758 status: DiffStatus::Added,
2759 old_path: Some(path.clone()),
2760 new_path: Some(path.clone()),
2761 old_mode: "000000".to_string(),
2762 new_mode: format_mode(0o160000),
2763 old_oid: zero_oid(),
2764 new_oid,
2765 score: None,
2766 });
2767 }
2768 (None, None) => {}
2769 }
2770 continue;
2771 }
2772
2773 let file_path = work_tree.join(path);
2774
2775 let wt_meta = match fs::symlink_metadata(&file_path) {
2776 Ok(m) => Some(m),
2777 Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
2778 Err(e) => return Err(Error::Io(e)),
2779 };
2780
2781 if unmerged_only_paths.contains(path) {
2782 if let (Some(te), Some(meta)) = (tree_entry, wt_meta.as_ref()) {
2783 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
2784 let wt_oid =
2785 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, None)?;
2786 let wt_mode = mode_from_metadata(meta);
2787 if wt_oid != te.oid || wt_mode != te.mode {
2788 result.push(DiffEntry {
2789 status: DiffStatus::Modified,
2790 old_path: Some(path.clone()),
2791 new_path: Some(path.clone()),
2792 old_mode: format_mode(te.mode),
2793 new_mode: format_mode(wt_mode),
2794 old_oid: te.oid,
2795 new_oid: wt_oid,
2796 score: None,
2797 });
2798 }
2799 }
2800 continue;
2801 }
2802
2803 match (tree_entry, wt_meta) {
2804 (Some(te), Some(ref meta)) => {
2805 let wt_mode = mode_from_metadata(meta);
2806 let Some(ie) = index_entries.get(path.as_bytes()) else {
2807 continue;
2808 };
2809
2810 let index_matches_tree = ie.oid == te.oid && ie.mode == te.mode;
2811
2812 if index_matches_tree && wt_mode == te.mode && stat_matches(ie, meta) {
2814 continue;
2815 }
2816
2817 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
2818 let idx_ent = index_entries.get(path.as_bytes()).copied();
2819
2820 if ie.oid == te.oid && ie.mode != te.mode {
2822 result.push(DiffEntry {
2823 status: DiffStatus::Modified,
2824 old_path: Some(path.clone()),
2825 new_path: Some(path.clone()),
2826 old_mode: format_mode(te.mode),
2827 new_mode: format_mode(ie.mode),
2828 old_oid: te.oid,
2829 new_oid: te.oid,
2830 score: None,
2831 });
2832 continue;
2833 }
2834
2835 if index_matches_tree {
2838 let wt_oid = hash_worktree_file(
2839 odb,
2840 &file_path,
2841 meta,
2842 &conv,
2843 &file_attrs,
2844 path,
2845 idx_ent,
2846 )?;
2847 let mut eff_oid = wt_oid;
2848 if eff_oid != te.oid {
2849 if let Ok(raw) = fs::read(&file_path) {
2850 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2851 if raw_oid == te.oid {
2852 eff_oid = te.oid;
2853 }
2854 }
2855 }
2856 if eff_oid != te.oid {
2857 result.push(DiffEntry {
2858 status: DiffStatus::Modified,
2859 old_path: Some(path.clone()),
2860 new_path: Some(path.clone()),
2861 old_mode: format_mode(te.mode),
2862 new_mode: format_mode(wt_mode),
2863 old_oid: te.oid,
2864 new_oid: eff_oid,
2865 score: None,
2866 });
2867 } else if wt_mode != te.mode {
2868 result.push(DiffEntry {
2869 status: DiffStatus::Modified,
2870 old_path: Some(path.clone()),
2871 new_path: Some(path.clone()),
2872 old_mode: format_mode(te.mode),
2873 new_mode: format_mode(wt_mode),
2874 old_oid: te.oid,
2875 new_oid: te.oid,
2876 score: None,
2877 });
2878 }
2879 continue;
2880 }
2881
2882 let wt_oid =
2884 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, idx_ent)?;
2885 let mut eff_oid = wt_oid;
2886 if eff_oid != te.oid {
2887 if let Ok(raw) = fs::read(&file_path) {
2888 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
2889 if raw_oid == te.oid {
2890 eff_oid = te.oid;
2891 }
2892 }
2893 }
2894 if eff_oid != te.oid || wt_mode != te.mode {
2895 result.push(DiffEntry {
2896 status: DiffStatus::Modified,
2897 old_path: Some(path.clone()),
2898 new_path: Some(path.clone()),
2899 old_mode: format_mode(te.mode),
2900 new_mode: format_mode(wt_mode),
2901 old_oid: te.oid,
2902 new_oid: eff_oid,
2903 score: None,
2904 });
2905 }
2906 }
2907 (Some(te), None) => {
2908 result.push(DiffEntry {
2910 status: DiffStatus::Deleted,
2911 old_path: Some(path.clone()),
2912 new_path: None,
2913 old_mode: format_mode(te.mode),
2914 new_mode: "000000".to_owned(),
2915 old_oid: te.oid,
2916 new_oid: zero_oid(),
2917 score: None,
2918 });
2919 }
2920 (None, Some(ref meta)) => {
2921 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
2923 let wt_oid = hash_worktree_file(
2924 odb,
2925 &file_path,
2926 meta,
2927 &conv,
2928 &file_attrs,
2929 path,
2930 index_entries.get(path.as_bytes()).copied(),
2931 )?;
2932 let wt_mode = mode_from_metadata(meta);
2933 result.push(DiffEntry {
2934 status: DiffStatus::Added,
2935 old_path: None,
2936 new_path: Some(path.clone()),
2937 old_mode: "000000".to_owned(),
2938 new_mode: format_mode(wt_mode),
2939 old_oid: zero_oid(),
2940 new_oid: wt_oid,
2941 score: None,
2942 });
2943 }
2944 (None, None) => {
2945 }
2947 }
2948 }
2949
2950 result.sort_by(|a, b| a.path().cmp(b.path()));
2951 Ok(result)
2952}
2953
2954fn read_added_entry_bytes(
2957 odb: &Odb,
2958 entry: &DiffEntry,
2959 work_root: Option<&Path>,
2960) -> Option<Vec<u8>> {
2961 if entry.new_oid != zero_oid() {
2962 return odb.read(&entry.new_oid).ok().map(|obj| obj.data);
2963 }
2964 let path = entry.new_path.as_deref()?;
2965 let root = work_root?;
2966 fs::read(root.join(path)).ok()
2967}
2968
2969fn modified_as_copy_from_sources(
2970 odb: &Odb,
2971 work_root: Option<&Path>,
2972 e: &DiffEntry,
2973 threshold: u32,
2974 sources: &[(String, ObjectId, bool)],
2975 source_contents: &[Option<Vec<u8>>],
2976 source_tree_entries: &[(String, String, ObjectId)],
2977) -> Option<DiffEntry> {
2978 fn regular_file_mode(mode: &str) -> bool {
2979 mode == "100644" || mode == "100755"
2980 }
2981
2982 if e.status != DiffStatus::Modified || !regular_file_mode(&e.new_mode) {
2983 return None;
2984 }
2985 let new_data = read_added_entry_bytes(odb, e, work_root)?;
2986 let new_oid_eff = if e.new_oid != zero_oid() {
2987 e.new_oid
2988 } else {
2989 Odb::hash_object_data(ObjectKind::Blob, &new_data)
2990 };
2991
2992 let mut best: Option<(usize, u32)> = None;
2993 for (si, (src_path, src_oid, is_deleted)) in sources.iter().enumerate() {
2994 if *is_deleted {
2995 continue;
2996 }
2997 if e.new_path.as_deref() == Some(src_path.as_str()) {
2998 continue;
2999 }
3000 let src_mode_str = source_tree_entries
3001 .iter()
3002 .find(|(p, _, _)| p == src_path)
3003 .map(|(_, m, _)| m.as_str())
3004 .unwrap_or("100644");
3005 if !regular_file_mode(src_mode_str) {
3006 continue;
3007 }
3008
3009 let score = if *src_oid == new_oid_eff {
3010 100
3011 } else {
3012 match (&source_contents[si], Some(new_data.as_slice())) {
3013 (Some(old_data), Some(nd)) => compute_similarity(old_data, nd),
3014 _ => 0,
3015 }
3016 };
3017 if score >= threshold {
3018 let replace = match best {
3019 None => true,
3020 Some((_, s)) => score > s,
3021 };
3022 if replace {
3023 best = Some((si, score));
3024 }
3025 }
3026 }
3027
3028 let (si, score) = best?;
3029 let (src_path, src_oid, _) = &sources[si];
3030 let src_mode = source_tree_entries
3031 .iter()
3032 .find(|(p, _, _)| p == src_path)
3033 .map(|(_, m, _)| m.clone())
3034 .unwrap_or_else(|| e.old_mode.clone());
3035
3036 Some(DiffEntry {
3037 status: DiffStatus::Copied,
3038 old_path: Some(src_path.clone()),
3039 new_path: e.new_path.clone(),
3040 old_mode: src_mode,
3041 new_mode: e.new_mode.clone(),
3042 old_oid: *src_oid,
3043 new_oid: e.new_oid,
3044 score: Some(score),
3045 })
3046}
3047
3048pub fn detect_renames(
3060 odb: &Odb,
3061 work_root: Option<&Path>,
3062 entries: Vec<DiffEntry>,
3063 threshold: u32,
3064) -> Vec<DiffEntry> {
3065 let mut deleted: Vec<DiffEntry> = Vec::new();
3067 let mut added: Vec<DiffEntry> = Vec::new();
3068 let mut others: Vec<DiffEntry> = Vec::new();
3069
3070 for entry in entries {
3071 match entry.status {
3072 DiffStatus::Deleted => deleted.push(entry),
3073 DiffStatus::Added => added.push(entry),
3074 _ => others.push(entry),
3075 }
3076 }
3077
3078 if deleted.is_empty() || added.is_empty() {
3079 let mut result = others;
3081 result.extend(deleted);
3082 result.extend(added);
3083 result.sort_by(|a, b| a.path().cmp(b.path()));
3084 return result;
3085 }
3086
3087 let deleted_contents: Vec<Option<Vec<u8>>> = deleted
3089 .iter()
3090 .map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
3091 .collect();
3092
3093 let added_contents: Vec<Option<Vec<u8>>> = added
3095 .iter()
3096 .map(|a| read_added_entry_bytes(odb, a, work_root))
3097 .collect();
3098
3099 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
3102
3103 fn is_regularish_mode(mode: &str) -> bool {
3104 mode == "100644" || mode == "100755"
3105 }
3106
3107 fn same_path_same_blob(del: &DiffEntry, add: &DiffEntry) -> bool {
3108 del.old_path == add.new_path && del.old_oid == add.new_oid && del.old_mode == add.new_mode
3109 }
3110
3111 for (di, del) in deleted.iter().enumerate() {
3112 for (ai, add) in added.iter().enumerate() {
3113 if del.old_oid == add.new_oid {
3115 scores.push((100, di, ai));
3116 continue;
3117 }
3118
3119 if !is_regularish_mode(&del.old_mode) || !is_regularish_mode(&add.new_mode) {
3122 continue;
3123 }
3124
3125 let score = match (&deleted_contents[di], &added_contents[ai]) {
3126 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
3127 _ => 0,
3128 };
3129
3130 if score >= threshold {
3131 scores.push((score, di, ai));
3132 }
3133 }
3134 }
3135
3136 scores.sort_by(|a, b| {
3140 let a_noop = same_path_same_blob(&deleted[a.1], &added[a.2]);
3141 let b_noop = same_path_same_blob(&deleted[b.1], &added[b.2]);
3142 let a_same = same_basename(&deleted[a.1], &added[a.2]);
3143 let b_same = same_basename(&deleted[b.1], &added[b.2]);
3144 a_noop
3145 .cmp(&b_noop)
3146 .then_with(|| b_same.cmp(&a_same))
3147 .then_with(|| b.0.cmp(&a.0))
3148 });
3149
3150 let mut used_deleted = vec![false; deleted.len()];
3151 let mut used_added = vec![false; added.len()];
3152 let mut renames: Vec<DiffEntry> = Vec::new();
3153
3154 for (score, di, ai) in &scores {
3155 if used_deleted[*di] || used_added[*ai] {
3156 continue;
3157 }
3158 used_deleted[*di] = true;
3159 used_added[*ai] = true;
3160
3161 let del = &deleted[*di];
3162 let add = &added[*ai];
3163
3164 if same_path_same_blob(del, add) {
3169 continue;
3170 }
3171
3172 renames.push(DiffEntry {
3173 status: DiffStatus::Renamed,
3174 old_path: del.old_path.clone(),
3175 new_path: add.new_path.clone(),
3176 old_mode: del.old_mode.clone(),
3177 new_mode: add.new_mode.clone(),
3178 old_oid: del.old_oid,
3179 new_oid: add.new_oid,
3180 score: Some(*score),
3181 });
3182 }
3183
3184 let mut result = others;
3186 result.extend(renames);
3187 for (i, entry) in deleted.into_iter().enumerate() {
3188 if !used_deleted[i] {
3189 result.push(entry);
3190 }
3191 }
3192 for (i, entry) in added.into_iter().enumerate() {
3193 if !used_added[i] {
3194 result.push(entry);
3195 }
3196 }
3197
3198 result.sort_by(|a, b| a.path().cmp(b.path()));
3199 result
3200}
3201
3202pub fn detect_copies(
3213 odb: &Odb,
3214 work_root: Option<&Path>,
3215 entries: Vec<DiffEntry>,
3216 threshold: u32,
3217 find_copies_harder: bool,
3218 source_tree_entries: &[(String, String, ObjectId)],
3219) -> Vec<DiffEntry> {
3220 use std::collections::{HashMap, HashSet};
3221
3222 let mut deleted: Vec<DiffEntry> = Vec::new();
3224 let mut added: Vec<DiffEntry> = Vec::new();
3225 let mut others: Vec<DiffEntry> = Vec::new();
3226
3227 for entry in entries {
3228 match entry.status {
3229 DiffStatus::Deleted => deleted.push(entry),
3230 DiffStatus::Added => added.push(entry),
3231 _ => others.push(entry),
3232 }
3233 }
3234
3235 let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
3239
3240 for entry in &deleted {
3241 if let Some(ref path) = entry.old_path {
3242 deleted_source_idx.insert(path.clone(), sources.len());
3243 sources.push((path.clone(), entry.old_oid, true));
3244 }
3245 }
3246
3247 for entry in &others {
3250 if matches!(entry.status, DiffStatus::Modified | DiffStatus::TypeChanged) {
3251 if let Some(ref old_path) = entry.old_path {
3252 if !sources.iter().any(|(p, _, _)| p == old_path) {
3253 sources.push((old_path.clone(), entry.old_oid, false));
3254 }
3255 }
3256 }
3257 }
3258
3259 if find_copies_harder {
3261 for (path, _mode, oid) in source_tree_entries {
3262 if !sources.iter().any(|(p, _, _)| p == path) {
3263 sources.push((path.clone(), *oid, false));
3264 }
3265 }
3266 }
3267
3268 if sources.is_empty() {
3269 let mut result = others;
3270 result.extend(deleted);
3271 result.extend(added);
3272 result.sort_by(|a, b| a.path().cmp(b.path()));
3273 return result;
3274 }
3275
3276 let source_contents: Vec<Option<Vec<u8>>> = sources
3278 .iter()
3279 .map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
3280 .collect();
3281
3282 let mut result_entries: Vec<DiffEntry> = Vec::new();
3283 let mut renamed_deleted: HashSet<usize> = HashSet::new();
3284 let mut used_added2 = vec![false; added.len()];
3285
3286 if !added.is_empty() {
3287 let added_contents: Vec<Option<Vec<u8>>> = added
3289 .iter()
3290 .map(|a| read_added_entry_bytes(odb, a, work_root))
3291 .collect();
3292
3293 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
3295 for (si, (src_path, src_oid, _)) in sources.iter().enumerate() {
3296 for (ai, add) in added.iter().enumerate() {
3297 if add.new_path.as_deref() == Some(src_path.as_str()) {
3300 continue;
3301 }
3302 let add_oid = if add.new_oid != zero_oid() {
3303 add.new_oid
3304 } else if let Some(ref data) = added_contents[ai] {
3305 Odb::hash_object_data(ObjectKind::Blob, data)
3306 } else {
3307 zero_oid()
3308 };
3309 if *src_oid == add_oid {
3310 scores.push((100, si, ai));
3311 continue;
3312 }
3313 let score = match (&source_contents[si], &added_contents[ai]) {
3314 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
3315 _ => 0,
3316 };
3317 if score >= threshold {
3318 scores.push((score, si, ai));
3319 }
3320 }
3321 }
3322
3323 scores.sort_by(|a, b| b.0.cmp(&a.0));
3325
3326 let mut used_added = vec![false; added.len()];
3328 let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
3329 for &(score, si, ai) in &scores {
3330 if used_added[ai] {
3331 continue;
3332 }
3333 used_added[ai] = true;
3334 source_to_added.entry(si).or_default().push((ai, score));
3335 }
3336
3337 for (&si, assignments_for_src) in &source_to_added {
3339 let (_, _, is_deleted) = &sources[si];
3340 if *is_deleted && !assignments_for_src.is_empty() {
3341 let rename_ai = assignments_for_src
3344 .iter()
3345 .max_by_key(|(ai, _score)| added[*ai].path().to_string())
3346 .map(|(ai, _)| *ai);
3347
3348 for &(ai, score) in assignments_for_src {
3349 let (ref src_path, _, _) = sources[si];
3350 let add = &added[ai];
3351 let src_mode = source_tree_entries
3352 .iter()
3353 .find(|(p, _, _)| p == src_path)
3354 .map(|(_, m, _)| m.clone())
3355 .unwrap_or_else(|| add.old_mode.clone());
3356
3357 let is_rename = Some(ai) == rename_ai;
3358 result_entries.push(DiffEntry {
3359 status: if is_rename {
3360 DiffStatus::Renamed
3361 } else {
3362 DiffStatus::Copied
3363 },
3364 old_path: Some(src_path.clone()),
3365 new_path: add.new_path.clone(),
3366 old_mode: src_mode,
3367 new_mode: add.new_mode.clone(),
3368 old_oid: sources[si].1,
3369 new_oid: add.new_oid,
3370 score: Some(score),
3371 });
3372 used_added2[ai] = true;
3373 }
3374 renamed_deleted.insert(si);
3375 } else {
3376 for &(ai, score) in assignments_for_src {
3378 let (ref src_path, _, _) = sources[si];
3379 let add = &added[ai];
3380 let src_mode = source_tree_entries
3381 .iter()
3382 .find(|(p, _, _)| p == src_path)
3383 .map(|(_, m, _)| m.clone())
3384 .unwrap_or_else(|| add.old_mode.clone());
3385
3386 result_entries.push(DiffEntry {
3387 status: DiffStatus::Copied,
3388 old_path: Some(src_path.clone()),
3389 new_path: add.new_path.clone(),
3390 old_mode: src_mode,
3391 new_mode: add.new_mode.clone(),
3392 old_oid: sources[si].1,
3393 new_oid: add.new_oid,
3394 score: Some(score),
3395 });
3396 used_added2[ai] = true;
3397 }
3398 }
3399 }
3400 }
3401
3402 for entry in deleted.into_iter() {
3404 if let Some(ref path) = entry.old_path {
3405 if let Some(&si) = deleted_source_idx.get(path) {
3406 if renamed_deleted.contains(&si) {
3407 continue;
3409 }
3410 }
3411 }
3412 result_entries.push(entry);
3413 }
3414
3415 let mut result = others;
3416 result.extend(result_entries);
3417 for (i, entry) in added.into_iter().enumerate() {
3419 if !used_added2[i] {
3420 result.push(entry);
3421 }
3422 }
3423
3424 let mut final_result = Vec::with_capacity(result.len());
3425 for e in result {
3426 if let Some(c) = modified_as_copy_from_sources(
3427 odb,
3428 work_root,
3429 &e,
3430 threshold,
3431 &sources,
3432 &source_contents,
3433 source_tree_entries,
3434 ) {
3435 final_result.push(c);
3436 } else {
3437 final_result.push(e);
3438 }
3439 }
3440
3441 final_result.sort_by(|a, b| a.path().cmp(b.path()));
3442 final_result
3443}
3444
3445pub fn status_apply_rename_copy_detection(
3455 odb: &Odb,
3456 unstaged_raw: Vec<DiffEntry>,
3457 threshold: u32,
3458 copies: bool,
3459 head_tree: Option<&ObjectId>,
3460) -> Result<Vec<DiffEntry>> {
3461 if !copies {
3462 return Ok(detect_renames(odb, None, unstaged_raw, threshold));
3463 }
3464 let source_tree_entries: Vec<(String, String, ObjectId)> = match head_tree {
3465 Some(oid) => flatten_tree(odb, oid, "")?
3466 .into_iter()
3467 .map(|e| (e.path, format_mode(e.mode), e.oid))
3468 .collect(),
3469 None => Vec::new(),
3470 };
3471 Ok(detect_copies(
3472 odb,
3473 None,
3474 unstaged_raw,
3475 threshold,
3476 false,
3477 &source_tree_entries,
3478 ))
3479}
3480
3481pub fn format_rename_path(old: &str, new: &str) -> String {
3489 let ob = old.as_bytes();
3490 let nb = new.as_bytes();
3491
3492 let pfx = {
3494 let mut last_sep = 0usize;
3495 let min_len = ob.len().min(nb.len());
3496 for i in 0..min_len {
3497 if ob[i] != nb[i] {
3498 break;
3499 }
3500 if ob[i] == b'/' {
3501 last_sep = i + 1;
3502 }
3503 }
3504 last_sep
3505 };
3506
3507 let mut sfx = {
3509 let mut last_sep = 0usize;
3510 let min_len = ob.len().min(nb.len());
3511 for i in 0..min_len {
3512 let oi = ob.len() - 1 - i;
3513 let ni = nb.len() - 1 - i;
3514 if ob[oi] != nb[ni] {
3515 break;
3516 }
3517 if ob[oi] == b'/' {
3518 last_sep = i + 1;
3519 }
3520 }
3521 last_sep
3522 };
3523
3524 let mut sfx_at_old = ob.len() - sfx;
3526 let mut sfx_at_new = nb.len() - sfx;
3527
3528 while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
3531 let suffix_bytes = &ob[sfx_at_old..];
3533 let mut new_sfx = 0;
3534 for (i, &b) in suffix_bytes.iter().enumerate().skip(1) {
3536 if b == b'/' {
3537 new_sfx = sfx - i;
3538 break;
3539 }
3540 }
3541 if new_sfx == 0 || new_sfx >= sfx {
3542 sfx_at_old = ob.len();
3543 sfx_at_new = nb.len();
3544 break;
3545 }
3546 sfx = new_sfx;
3547 sfx_at_old = ob.len() - sfx;
3548 sfx_at_new = nb.len() - sfx;
3549 }
3550
3551 let prefix = &old[..pfx];
3558 let suffix = &old[sfx_at_old..];
3559 let old_mid = if pfx <= sfx_at_old {
3560 &old[pfx..sfx_at_old]
3561 } else {
3562 ""
3563 };
3564 let new_mid = if pfx <= sfx_at_new {
3565 &new[pfx..sfx_at_new]
3566 } else {
3567 ""
3568 };
3569
3570 if prefix.is_empty() && suffix.is_empty() {
3571 return format!("{old} => {new}");
3572 }
3573
3574 format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
3575}
3576
3577fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
3579 let old = del.old_path.as_deref().unwrap_or("");
3580 let new = add.new_path.as_deref().unwrap_or("");
3581 let old_base = old.rsplit('/').next().unwrap_or(old);
3582 let new_base = new.rsplit('/').next().unwrap_or(new);
3583 old_base == new_base && !old_base.is_empty()
3584}
3585
3586fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
3591 let old_norm = crate::crlf::crlf_to_lf(old);
3594 let new_norm = crate::crlf::crlf_to_lf(new);
3595
3596 let src_size = old_norm.len();
3597 let dst_size = new_norm.len();
3598
3599 if src_size == 0 && dst_size == 0 {
3600 return 100;
3601 }
3602 let total = src_size + dst_size;
3603 if total == 0 {
3604 return 100;
3605 }
3606
3607 use similar::{ChangeTag, TextDiff};
3609 let old_str = String::from_utf8_lossy(&old_norm);
3610 let new_str = String::from_utf8_lossy(&new_norm);
3611 let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
3612
3613 let mut shared_bytes = 0usize;
3614 for change in diff.iter_all_changes() {
3615 if change.tag() == ChangeTag::Equal {
3616 shared_bytes += change.value().len();
3618 }
3619 }
3620
3621 let max_size = src_size.max(dst_size);
3624
3625 ((shared_bytes * 100) / max_size).min(100) as u32
3626}
3627
3628#[must_use]
3632pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
3633 compute_similarity(old, new)
3634}
3635
3636pub fn format_raw(entry: &DiffEntry) -> String {
3642 let path = match entry.status {
3643 DiffStatus::Renamed | DiffStatus::Copied => {
3644 format!(
3645 "{}\t{}",
3646 entry.old_path.as_deref().unwrap_or(""),
3647 entry.new_path.as_deref().unwrap_or("")
3648 )
3649 }
3650 _ => entry.path().to_owned(),
3651 };
3652
3653 let status_str = match (entry.status, entry.score) {
3654 (DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
3655 (DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
3656 _ => entry.status.letter().to_string(),
3657 };
3658
3659 format!(
3660 ":{} {} {} {} {}\t{}",
3661 entry.old_mode, entry.new_mode, entry.old_oid, entry.new_oid, status_str, path
3662 )
3663}
3664
3665pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
3667 let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
3668 "..."
3669 } else {
3670 ""
3671 };
3672 let old_hex = format!("{}", entry.old_oid);
3673 let new_hex = format!("{}", entry.new_oid);
3674 let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
3675 let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
3676
3677 let path = match entry.status {
3679 DiffStatus::Renamed | DiffStatus::Copied => format!(
3680 "{}\t{}",
3681 entry.old_path.as_deref().unwrap_or(""),
3682 entry.new_path.as_deref().unwrap_or("")
3683 ),
3684 _ => entry.path().to_owned(),
3685 };
3686 let status_str = match (entry.status, entry.score) {
3687 (DiffStatus::Renamed, Some(s)) => format!("R{s:03}"),
3688 (DiffStatus::Copied, Some(s)) => format!("C{s:03}"),
3689 _ => entry.status.letter().to_string(),
3690 };
3691
3692 format!(
3693 ":{} {} {}{} {}{} {}\t{}",
3694 entry.old_mode,
3695 entry.new_mode,
3696 old_abbrev,
3697 ellipsis,
3698 new_abbrev,
3699 ellipsis,
3700 status_str,
3701 path
3702 )
3703}
3704
3705pub fn unified_diff(
3720 old_content: &str,
3721 new_content: &str,
3722 old_path: &str,
3723 new_path: &str,
3724 context_lines: usize,
3725 indent_heuristic: bool,
3726 quote_path_fully: bool,
3727) -> String {
3728 unified_diff_with_prefix(
3729 old_content,
3730 new_content,
3731 old_path,
3732 new_path,
3733 context_lines,
3734 0,
3735 "a/",
3736 "b/",
3737 indent_heuristic,
3738 quote_path_fully,
3739 )
3740}
3741
3742#[allow(clippy::too_many_arguments)] pub fn unified_diff_with_prefix(
3748 old_content: &str,
3749 new_content: &str,
3750 old_path: &str,
3751 new_path: &str,
3752 context_lines: usize,
3753 inter_hunk_context: usize,
3754 src_prefix: &str,
3755 dst_prefix: &str,
3756 indent_heuristic: bool,
3757 quote_path_fully: bool,
3758) -> String {
3759 unified_diff_with_prefix_and_funcname(
3760 old_content,
3761 new_content,
3762 old_path,
3763 new_path,
3764 context_lines,
3765 inter_hunk_context,
3766 src_prefix,
3767 dst_prefix,
3768 None,
3769 indent_heuristic,
3770 quote_path_fully,
3771 )
3772}
3773
3774#[allow(clippy::too_many_arguments)]
3777pub fn unified_diff_with_prefix_and_funcname(
3778 old_content: &str,
3779 new_content: &str,
3780 old_path: &str,
3781 new_path: &str,
3782 context_lines: usize,
3783 inter_hunk_context: usize,
3784 src_prefix: &str,
3785 dst_prefix: &str,
3786 funcname_matcher: Option<&FuncnameMatcher>,
3787 indent_heuristic: bool,
3788 quote_path_fully: bool,
3789) -> String {
3790 unified_diff_with_prefix_and_funcname_and_algorithm(
3791 old_content,
3792 new_content,
3793 old_path,
3794 new_path,
3795 context_lines,
3796 inter_hunk_context,
3797 src_prefix,
3798 dst_prefix,
3799 funcname_matcher,
3800 similar::Algorithm::Myers,
3801 false,
3802 false,
3803 indent_heuristic,
3804 quote_path_fully,
3805 )
3806}
3807
3808#[allow(clippy::too_many_arguments)]
3814pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
3815 old_content: &str,
3816 new_content: &str,
3817 old_path: &str,
3818 new_path: &str,
3819 context_lines: usize,
3820 inter_hunk_context: usize,
3821 src_prefix: &str,
3822 dst_prefix: &str,
3823 funcname_matcher: Option<&FuncnameMatcher>,
3824 algorithm: similar::Algorithm,
3825 function_context: bool,
3826 use_git_histogram: bool,
3827 indent_heuristic: bool,
3828 quote_path_fully: bool,
3829) -> String {
3830 if function_context {
3834 return unified_diff_with_function_context(
3835 old_content,
3836 new_content,
3837 old_path,
3838 new_path,
3839 context_lines,
3840 inter_hunk_context,
3841 src_prefix,
3842 dst_prefix,
3843 funcname_matcher,
3844 algorithm,
3845 indent_heuristic,
3846 quote_path_fully,
3847 );
3848 }
3849
3850 if use_git_histogram {
3851 return unified_diff_histogram_with_prefix_and_funcname(
3852 old_content,
3853 new_content,
3854 old_path,
3855 new_path,
3856 context_lines,
3857 inter_hunk_context,
3858 src_prefix,
3859 dst_prefix,
3860 funcname_matcher,
3861 quote_path_fully,
3862 );
3863 }
3864
3865 use crate::quote_path::format_diff_path_with_prefix;
3866 use similar::{udiff::UnifiedDiffHunk, TextDiff};
3867
3868 let diff = TextDiff::configure()
3869 .algorithm(algorithm)
3870 .diff_lines(old_content, new_content);
3871 let compacted_ops = diff_indent_heuristic::diff_lines_ops_compacted(
3872 old_content,
3873 new_content,
3874 algorithm,
3875 indent_heuristic,
3876 );
3877
3878 let mut output = String::new();
3879 if old_path == "/dev/null" {
3880 output.push_str("--- /dev/null\n");
3881 } else if src_prefix.is_empty() {
3882 output.push_str(&format!("--- {old_path}\n"));
3885 } else {
3886 output.push_str("--- ");
3887 output.push_str(&format_diff_path_with_prefix(
3888 src_prefix,
3889 old_path,
3890 quote_path_fully,
3891 ));
3892 output.push('\n');
3893 }
3894 if new_path == "/dev/null" {
3895 output.push_str("+++ /dev/null\n");
3896 } else if dst_prefix.is_empty() {
3897 output.push_str(&format!("+++ {new_path}\n"));
3898 } else {
3899 output.push_str("+++ ");
3900 output.push_str(&format_diff_path_with_prefix(
3901 dst_prefix,
3902 new_path,
3903 quote_path_fully,
3904 ));
3905 output.push('\n');
3906 }
3907
3908 let old_lines: Vec<&str> = old_content.lines().collect();
3909
3910 let max_common_gap = context_lines
3916 .saturating_mul(2)
3917 .saturating_add(inter_hunk_context);
3918 let op_groups = group_diff_ops_gap(compacted_ops, context_lines, max_common_gap);
3919
3920 for ops in op_groups {
3921 if ops.is_empty() {
3922 continue;
3923 }
3924 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
3925 let hunk_str = format!("{hunk}");
3926 if let Some(first_newline) = hunk_str.find('\n') {
3930 let header_line = &hunk_str[..first_newline];
3931 let rest = &hunk_str[first_newline..];
3932
3933 if let Some(func_ctx) =
3935 extract_function_context(header_line, &old_lines, funcname_matcher)
3936 {
3937 output.push_str(header_line);
3938 output.push(' ');
3939 output.push_str(&func_ctx);
3940 output.push_str(rest);
3941 } else {
3942 output.push_str(&hunk_str);
3943 }
3944 } else {
3945 output.push_str(&hunk_str);
3946 }
3947 }
3948
3949 output
3950}
3951
3952fn group_diff_ops_gap(
3958 mut ops: Vec<similar::DiffOp>,
3959 context: usize,
3960 max_common_gap: usize,
3961) -> Vec<Vec<similar::DiffOp>> {
3962 use similar::DiffOp;
3963 if ops.is_empty() {
3964 return vec![];
3965 }
3966
3967 let mut pending_group = Vec::new();
3968 let mut rv = Vec::new();
3969
3970 if let Some(DiffOp::Equal {
3971 old_index,
3972 new_index,
3973 len,
3974 }) = ops.first_mut()
3975 {
3976 let offset = (*len).saturating_sub(context);
3977 *old_index += offset;
3978 *new_index += offset;
3979 *len -= offset;
3980 }
3981
3982 if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
3983 *len -= (*len).saturating_sub(context);
3984 }
3985
3986 for op in ops.into_iter() {
3987 if let DiffOp::Equal {
3988 old_index,
3989 new_index,
3990 len,
3991 } = op
3992 {
3993 if len > max_common_gap {
3996 pending_group.push(DiffOp::Equal {
3997 old_index,
3998 new_index,
3999 len: context,
4000 });
4001 rv.push(pending_group);
4002 let offset = len.saturating_sub(context);
4003 pending_group = vec![DiffOp::Equal {
4004 old_index: old_index + offset,
4005 new_index: new_index + offset,
4006 len: len - offset,
4007 }];
4008 continue;
4009 }
4010 }
4011 pending_group.push(op);
4012 }
4013
4014 match &pending_group[..] {
4015 &[] | &[similar::DiffOp::Equal { .. }] => {}
4016 _ => rv.push(pending_group),
4017 }
4018
4019 rv
4020}
4021
4022fn unified_diff_with_function_context(
4024 old_content: &str,
4025 new_content: &str,
4026 old_path: &str,
4027 new_path: &str,
4028 context_lines: usize,
4029 inter_hunk_context: usize,
4030 src_prefix: &str,
4031 dst_prefix: &str,
4032 funcname_matcher: Option<&FuncnameMatcher>,
4033 algorithm: similar::Algorithm,
4034 indent_heuristic: bool,
4035 quote_path_fully: bool,
4036) -> String {
4037 use crate::quote_path::format_diff_path_with_prefix;
4038 use similar::{udiff::UnifiedDiffHunk, TextDiff};
4039
4040 let diff = TextDiff::configure()
4041 .algorithm(algorithm)
4042 .diff_lines(old_content, new_content);
4043
4044 let old_lines: Vec<&str> = old_content.lines().collect();
4045 let new_lines: Vec<&str> = new_content.lines().collect();
4046 let n_old = old_lines.len();
4047 let n_new = new_lines.len();
4048
4049 let max_common_gap = context_lines
4056 .saturating_mul(2)
4057 .saturating_add(inter_hunk_context);
4058 let all_ops = diff.ops().to_vec();
4059 let op_groups = group_diff_ops_gap(all_ops.clone(), context_lines, max_common_gap);
4060
4061 let mut ranges: Vec<(usize, usize, usize, usize)> = Vec::new();
4062
4063 for ops in op_groups {
4064 if ops.is_empty() {
4065 continue;
4066 }
4067 let i1_anchor = func_context_old_anchor(&ops, n_old);
4068 let i1_end = hunk_old_change_end_exclusive(&ops);
4069 let skip_preimage_pull =
4070 append_with_whole_function_added(&ops, n_old, n_new, &new_lines, funcname_matcher);
4071 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
4072 let hunk_str = format!("{hunk}");
4073 let header_line = hunk_str
4074 .lines()
4075 .next()
4076 .unwrap_or("")
4077 .trim_end_matches(['\r', '\n']);
4078 let Some((base_s1, _base_e1, _base_s2, _base_e2)) =
4079 parse_unified_hunk_header_ranges(header_line)
4080 else {
4081 continue;
4082 };
4083
4084 let ctx = context_lines;
4085 let (s1, e1, s2, e2) = if skip_preimage_pull {
4086 let s = n_old.saturating_sub(ctx);
4087 let s2 = map_old_line_to_new(&all_ops, s, n_new).min(n_new);
4088 (s, n_old, s2, n_new)
4089 } else {
4090 let mut s1 = base_s1.saturating_sub(ctx);
4091 let mut s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
4092
4093 let base_pre_s1 = i1_anchor.saturating_sub(ctx);
4094 if base_pre_s1 < s1 {
4095 s1 = base_pre_s1;
4096 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
4097 }
4098
4099 let fs1 = expand_func_pre_start(s1, i1_anchor, n_old, &old_lines, funcname_matcher);
4100 if fs1 < s1 {
4101 s1 = fs1;
4102 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
4103 }
4104
4105 let mut e1 = (i1_end + ctx).min(n_old);
4111 let mut e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
4112 let fe1 = expand_func_post_end(e1, i1_end, n_old, &old_lines, funcname_matcher);
4113 if fe1 > e1 {
4114 e1 = fe1;
4115 e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
4116 }
4117 (s1, e1, s2, e2)
4118 };
4119
4120 ranges.push((s1, e1, s2, e2));
4121 }
4122
4123 ranges.sort_by_key(|r| (r.0, r.2));
4126 let mut merged: Vec<(usize, usize, usize, usize)> = Vec::with_capacity(ranges.len());
4127 for (s1, e1, s2, e2) in ranges {
4128 if let Some(last) = merged.last_mut() {
4129 if s1 < last.1 {
4130 last.1 = last.1.max(e1);
4131 last.3 = last.3.max(e2);
4132 continue;
4133 }
4134 }
4135 merged.push((s1, e1, s2, e2));
4136 }
4137 let ranges = merged;
4138
4139 let mut output = String::new();
4140 if old_path == "/dev/null" {
4141 output.push_str("--- /dev/null\n");
4142 } else if src_prefix.is_empty() {
4143 output.push_str(&format!("--- {old_path}\n"));
4144 } else {
4145 output.push_str("--- ");
4146 output.push_str(&format_diff_path_with_prefix(
4147 src_prefix,
4148 old_path,
4149 quote_path_fully,
4150 ));
4151 output.push('\n');
4152 }
4153 if new_path == "/dev/null" {
4154 output.push_str("+++ /dev/null\n");
4155 } else if dst_prefix.is_empty() {
4156 output.push_str(&format!("+++ {new_path}\n"));
4157 } else {
4158 output.push_str("+++ ");
4159 output.push_str(&format_diff_path_with_prefix(
4160 dst_prefix,
4161 new_path,
4162 quote_path_fully,
4163 ));
4164 output.push('\n');
4165 }
4166
4167 for (s1, e1, s2, e2) in ranges {
4168 if s1 >= e1 && s2 >= e2 {
4169 continue;
4170 }
4171 let old_seg =
4172 line_slice_for_diff_with_eof_nl(&old_lines, s1, e1, old_content.ends_with('\n'));
4173 let new_seg =
4174 line_slice_for_diff_with_eof_nl(&new_lines, s2, e2, new_content.ends_with('\n'));
4175 let inner_ctx = old_seg.lines().count().max(new_seg.lines().count()).max(1);
4176 let piece = unified_diff_with_prefix_and_funcname_and_algorithm(
4177 &old_seg,
4178 &new_seg,
4179 old_path,
4180 new_path,
4181 inner_ctx,
4182 0,
4183 src_prefix,
4184 dst_prefix,
4185 funcname_matcher,
4186 algorithm,
4187 false,
4188 false,
4189 indent_heuristic,
4190 quote_path_fully,
4191 );
4192 let shifted = shift_unified_hunk_headers_to_full_file(&piece, s1, s2);
4193 let with_func =
4194 enrich_unified_hunk_headers_funcname(&shifted, &old_lines, funcname_matcher);
4195 for line in with_func.lines() {
4196 if line.starts_with("--- ") || line.starts_with("+++ ") {
4197 continue;
4198 }
4199 output.push_str(line);
4200 output.push('\n');
4201 }
4202 }
4203
4204 output
4205}
4206
4207fn shift_unified_hunk_headers_to_full_file(
4212 patch: &str,
4213 delta_old: usize,
4214 delta_new: usize,
4215) -> String {
4216 if delta_old == 0 && delta_new == 0 {
4217 return patch.to_owned();
4218 }
4219 let mut out = String::with_capacity(patch.len());
4220 for line in patch.lines() {
4221 if let Some(shifted) = shift_one_unified_hunk_header(line, delta_old, delta_new) {
4222 out.push_str(&shifted);
4223 } else {
4224 out.push_str(line);
4225 }
4226 out.push('\n');
4227 }
4228 out
4229}
4230
4231fn shift_one_unified_hunk_header(line: &str, delta_old: usize, delta_new: usize) -> Option<String> {
4232 let rest = line.strip_prefix("@@ ")?;
4233 let (old_chunk, after_plus) = rest.split_once(" +")?;
4234 let old_spec = old_chunk.strip_prefix('-')?;
4235 let (new_spec, suffix) = after_plus.split_once(" @@")?;
4236 let shifted_old = shift_unified_range_spec(old_spec, delta_old)?;
4237 let shifted_new = shift_unified_range_spec(new_spec, delta_new)?;
4238 Some(format!("@@ -{shifted_old} +{shifted_new} @@{suffix}"))
4239}
4240
4241fn shift_unified_range_spec(spec: &str, delta: usize) -> Option<String> {
4242 let spec = spec.trim();
4243 if let Some((start_s, count_s)) = spec.split_once(',') {
4244 let start: usize = start_s.parse().ok()?;
4245 let count: usize = count_s.parse().ok()?;
4246 Some(format!("{},{}", start.saturating_add(delta), count))
4247 } else {
4248 let start: usize = spec.parse().ok()?;
4249 Some(format!("{}", start.saturating_add(delta)))
4250 }
4251}
4252
4253fn enrich_unified_hunk_headers_funcname(
4255 patch: &str,
4256 full_old_lines: &[&str],
4257 funcname_matcher: Option<&FuncnameMatcher>,
4258) -> String {
4259 let mut out = String::with_capacity(patch.len());
4260 for line in patch.lines() {
4261 if let Some(fixed) = enrich_one_hunk_header_funcname(line, full_old_lines, funcname_matcher)
4262 {
4263 out.push_str(&fixed);
4264 } else {
4265 out.push_str(line);
4266 }
4267 out.push('\n');
4268 }
4269 out
4270}
4271
4272fn enrich_one_hunk_header_funcname(
4273 line: &str,
4274 full_old_lines: &[&str],
4275 funcname_matcher: Option<&FuncnameMatcher>,
4276) -> Option<String> {
4277 let after_at = line.strip_prefix("@@ ")?;
4278 let idx = after_at.find(" @@")?;
4279 let mid = after_at[..idx].trim();
4280 let tail = after_at[idx + 3..].trim_start();
4281 let header_for_parse = format!("@@ {mid} @@");
4282 let func = extract_function_context(&header_for_parse, full_old_lines, funcname_matcher);
4283 Some(if let Some(f) = func {
4284 format!("@@ {mid} @@ {f}")
4285 } else if !tail.is_empty() {
4286 format!("@@ {mid} @@ {tail}")
4287 } else {
4288 format!("@@ {mid} @@")
4289 })
4290}
4291
4292fn line_slice_for_diff_with_eof_nl(
4293 lines: &[&str],
4294 start: usize,
4295 end: usize,
4296 full_file_ends_with_newline: bool,
4297) -> String {
4298 if start >= end {
4299 return String::new();
4300 }
4301 let mut s = lines[start..end].join("\n");
4302 let slice_is_suffix_of_file = end == lines.len();
4303 let need_trailing_nl = if slice_is_suffix_of_file {
4304 full_file_ends_with_newline
4305 } else {
4306 true
4307 };
4308 if need_trailing_nl && !s.ends_with('\n') {
4309 s.push('\n');
4310 }
4311 s
4312}
4313
4314fn map_old_line_to_new(ops: &[similar::DiffOp], old_line: usize, n_new: usize) -> usize {
4317 use similar::DiffOp;
4318 let mut n = 0usize;
4319 for op in ops {
4320 match *op {
4321 DiffOp::Equal {
4322 old_index,
4323 new_index,
4324 len,
4325 } => {
4326 if old_index + len <= old_line {
4327 n = new_index + len;
4328 continue;
4329 }
4330 if old_index < old_line {
4331 let take = old_line - old_index;
4332 return (new_index + take).min(n_new);
4333 }
4334 return new_index.min(n_new);
4335 }
4336 DiffOp::Delete {
4337 old_index,
4338 old_len,
4339 new_index,
4340 } => {
4341 if old_index + old_len <= old_line {
4342 n = new_index;
4343 continue;
4344 }
4345 if old_index < old_line {
4346 return new_index.min(n_new);
4347 }
4348 }
4349 DiffOp::Insert {
4350 old_index,
4351 new_index,
4352 new_len,
4353 } => {
4354 if old_index < old_line {
4355 n = new_index + new_len;
4356 continue;
4357 }
4358 if old_index == old_line {
4359 return (new_index + new_len).min(n_new);
4362 }
4363 return new_index.min(n_new);
4364 }
4365 DiffOp::Replace {
4366 old_index,
4367 old_len,
4368 new_index,
4369 new_len,
4370 } => {
4371 if old_index + old_len <= old_line {
4372 n = new_index + new_len;
4373 continue;
4374 }
4375 if old_index < old_line {
4376 let into_old = old_line - old_index;
4377 let mapped = new_index + into_old.min(new_len);
4378 return mapped.min(n_new);
4379 }
4380 return new_index.min(n_new);
4381 }
4382 }
4383 }
4384 n.min(n_new)
4385}
4386
4387fn parse_unified_hunk_header_ranges(header: &str) -> Option<(usize, usize, usize, usize)> {
4389 let rest = header.strip_prefix("@@ ")?;
4390 let (old_tok, rest2) = rest.split_once(" +")?;
4391 let old_tok = old_tok.strip_prefix('-')?;
4392 let new_tok = rest2.split_once(" @@").map(|(a, _)| a)?;
4393
4394 fn parse_side(spec: &str) -> Option<(usize, usize)> {
4395 let spec = spec.trim();
4396 let (start_one_based, count) = if let Some((a, b)) = spec.split_once(',') {
4397 (a.parse::<usize>().ok()?, b.parse::<usize>().ok()?)
4398 } else {
4399 let s = spec.parse::<usize>().ok()?;
4400 (s, 1usize)
4401 };
4402 let s0 = start_one_based.saturating_sub(1);
4403 let e0 = s0.saturating_add(count);
4404 Some((s0, e0))
4405 }
4406
4407 let (os, oe) = parse_side(old_tok)?;
4408 let (ns, ne) = parse_side(new_tok)?;
4409 Some((os, oe, ns, ne))
4410}
4411
4412fn append_with_whole_function_added(
4415 ops: &[similar::DiffOp],
4416 n_old: usize,
4417 n_new: usize,
4418 new_lines: &[&str],
4419 matcher: Option<&FuncnameMatcher>,
4420) -> bool {
4421 use similar::DiffOp;
4422 if n_old == 0 {
4423 return false;
4424 }
4425 let mut only_ins_or_eq = true;
4426 let mut min_new_ins = usize::MAX;
4427 for op in ops {
4428 match *op {
4429 DiffOp::Equal { .. } => {}
4430 DiffOp::Insert {
4431 new_index, new_len, ..
4432 } => {
4433 min_new_ins = min_new_ins.min(new_index);
4434 if new_len == 0 {
4435 only_ins_or_eq = false;
4436 }
4437 }
4438 DiffOp::Delete { .. } | DiffOp::Replace { .. } => {
4439 only_ins_or_eq = false;
4440 }
4441 }
4442 }
4443 let mut insert_at_eof = false;
4444 for op in ops {
4445 if let DiffOp::Insert { old_index, .. } = *op {
4446 if old_index == n_old {
4447 insert_at_eof = true;
4448 break;
4449 }
4450 }
4451 }
4452 let append_at_eof = min_new_ins == n_old || insert_at_eof;
4453 if !only_ins_or_eq || !append_at_eof || min_new_ins == usize::MAX {
4454 return false;
4455 }
4456 let mut j = min_new_ins;
4461 while j < n_new {
4462 let line = new_lines[j];
4463 if line.trim().is_empty() {
4464 j += 1;
4465 continue;
4466 }
4467 if let Some(m) = matcher {
4468 if m.match_line(line).is_some() {
4469 return true;
4470 }
4471 } else if inserted_block_starts_with_c_like_function_definition(line) {
4472 return true;
4473 }
4474 j += 1;
4475 }
4476 false
4477}
4478
4479fn inserted_block_starts_with_c_like_function_definition(line: &str) -> bool {
4480 let t = line.trim_start();
4481 let Some(open_paren) = t.find('(') else {
4482 return false;
4483 };
4484 let head = &t[..open_paren];
4485 let tokens: Vec<&str> = head.split_whitespace().collect();
4486 if tokens.len() < 2 {
4487 return false;
4489 }
4490 let nameish = tokens.last().copied().unwrap_or("");
4491 let name = nameish.trim_end_matches(['*', '&']);
4492 if name.is_empty() || !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
4493 return false;
4494 }
4495 let type_or_modifier = |tok: &str| {
4496 matches!(
4497 tok,
4498 "static"
4499 | "extern"
4500 | "inline"
4501 | "void"
4502 | "int"
4503 | "char"
4504 | "short"
4505 | "long"
4506 | "float"
4507 | "double"
4508 | "unsigned"
4509 | "signed"
4510 | "struct"
4511 | "enum"
4512 | "union"
4513 | "const"
4514 | "volatile"
4515 | "typedef"
4516 )
4517 };
4518 tokens[..tokens.len() - 1]
4519 .iter()
4520 .any(|tok| type_or_modifier(tok))
4521}
4522
4523fn hunk_old_change_end_exclusive(ops: &[similar::DiffOp]) -> usize {
4524 use similar::DiffOp;
4525 let mut max_o = 0usize;
4526 for op in ops {
4527 match *op {
4528 DiffOp::Delete {
4529 old_index, old_len, ..
4530 } => {
4531 max_o = max_o.max(old_index + old_len);
4532 }
4533 DiffOp::Replace {
4534 old_index, old_len, ..
4535 } => {
4536 max_o = max_o.max(old_index + old_len);
4537 }
4538 DiffOp::Insert { old_index, .. } => {
4539 max_o = max_o.max(old_index);
4542 }
4543 DiffOp::Equal { .. } => {}
4544 }
4545 }
4546 max_o
4547}
4548
4549fn func_context_old_anchor(ops: &[similar::DiffOp], n_old: usize) -> usize {
4550 use similar::DiffOp;
4551 let mut has_delete_or_replace = false;
4552 let mut min_del = usize::MAX;
4553 let mut min_ins_old = usize::MAX;
4554
4555 for op in ops {
4556 match *op {
4557 DiffOp::Delete {
4558 old_index, old_len, ..
4559 } => {
4560 has_delete_or_replace = true;
4561 min_del = min_del.min(old_index);
4562 min_del = min_del.min(old_index + old_len.saturating_sub(1));
4563 }
4564 DiffOp::Replace {
4565 old_index, old_len, ..
4566 } => {
4567 has_delete_or_replace = true;
4568 min_del = min_del.min(old_index);
4569 min_del = min_del.min(old_index + old_len.saturating_sub(1));
4570 }
4571 DiffOp::Insert { old_index, .. } => {
4572 min_ins_old = min_ins_old.min(old_index);
4573 }
4574 DiffOp::Equal { .. } => {}
4575 }
4576 }
4577
4578 let mut i1 = if has_delete_or_replace {
4579 min_del
4580 } else if min_ins_old != usize::MAX {
4581 min_ins_old
4582 } else {
4583 0
4584 };
4585
4586 let pure_insert = ops
4587 .iter()
4588 .all(|op| matches!(op, DiffOp::Insert { .. } | DiffOp::Equal { .. }))
4589 && ops.iter().any(|op| matches!(op, DiffOp::Insert { .. }));
4590
4591 if pure_insert && i1 >= n_old && n_old > 0 {
4592 i1 = n_old - 1;
4593 }
4594
4595 i1.min(n_old.saturating_sub(1))
4596}
4597
4598fn expand_func_pre_start(
4599 s1: usize,
4600 i1: usize,
4601 n_old: usize,
4602 old_lines: &[&str],
4603 matcher: Option<&FuncnameMatcher>,
4604) -> usize {
4605 if n_old == 0 {
4606 return s1;
4607 }
4608 let i1 = i1.min(n_old.saturating_sub(1));
4609 let mut fs1 = get_func_line_backward(old_lines, i1, matcher).unwrap_or(i1);
4610 while fs1 > 0
4611 && !is_line_empty_for_func_context(old_lines[fs1 - 1])
4612 && !is_func_line(old_lines[fs1 - 1], matcher)
4613 {
4614 fs1 -= 1;
4615 }
4616 s1.min(fs1)
4617}
4618
4619fn expand_func_post_end(
4620 e1: usize,
4621 i1_end: usize,
4622 n_old: usize,
4623 old_lines: &[&str],
4624 matcher: Option<&FuncnameMatcher>,
4625) -> usize {
4626 let from = i1_end.min(n_old);
4627 let fe1 = get_func_line_forward(old_lines, from, matcher).unwrap_or(n_old);
4628 let mut fe1_adj = fe1;
4629 while fe1_adj > 0 && is_line_empty_for_func_context(old_lines[fe1_adj - 1]) {
4630 fe1_adj -= 1;
4631 }
4632 e1.max(fe1_adj).min(n_old)
4633}
4634
4635fn is_line_empty_for_func_context(line: &str) -> bool {
4636 line.chars().all(|c| c.is_whitespace())
4637}
4638
4639fn is_func_line(line: &str, matcher: Option<&FuncnameMatcher>) -> bool {
4640 if let Some(m) = matcher {
4641 return m.match_line(line).is_some();
4642 }
4643 let t = line.trim_end_matches(['\n', '\r']);
4644 if t.is_empty() {
4645 return false;
4646 }
4647 let b = t.as_bytes()[0];
4648 b.is_ascii_alphabetic() || b == b'_' || b == b'$'
4649}
4650
4651fn get_func_line_backward(
4652 old_lines: &[&str],
4653 start: usize,
4654 matcher: Option<&FuncnameMatcher>,
4655) -> Option<usize> {
4656 let mut l = start.min(old_lines.len().saturating_sub(1));
4657 if old_lines.is_empty() {
4658 return None;
4659 }
4660 loop {
4661 if is_func_line(old_lines[l], matcher) {
4662 return Some(l);
4663 }
4664 if l == 0 {
4665 break;
4666 }
4667 l -= 1;
4668 }
4669 None
4670}
4671
4672fn get_func_line_forward(
4673 old_lines: &[&str],
4674 start: usize,
4675 matcher: Option<&FuncnameMatcher>,
4676) -> Option<usize> {
4677 let mut l = start;
4678 while l < old_lines.len() {
4679 if is_func_line(old_lines[l], matcher) {
4680 return Some(l);
4681 }
4682 l += 1;
4683 }
4684 None
4685}
4686
4687pub fn anchored_unified_diff(
4697 old_content: &str,
4698 new_content: &str,
4699 old_path: &str,
4700 new_path: &str,
4701 context_lines: usize,
4702 anchors: &[String],
4703 algorithm: similar::Algorithm,
4704 use_git_histogram: bool,
4705 indent_heuristic: bool,
4706 quote_path_fully: bool,
4707) -> String {
4708 use crate::quote_path::format_diff_path_with_prefix;
4709 use similar::TextDiff;
4710
4711 let old_lines: Vec<&str> = old_content.lines().collect();
4712 let new_lines: Vec<&str> = new_content.lines().collect();
4713
4714 let mut anchor_pairs: Vec<(usize, usize)> = Vec::new(); for anchor in anchors {
4718 let anchor_str = anchor.as_str();
4719
4720 let old_positions: Vec<usize> = old_lines
4722 .iter()
4723 .enumerate()
4724 .filter(|(_, l)| l.trim_end() == anchor_str)
4725 .map(|(i, _)| i)
4726 .collect();
4727
4728 let new_positions: Vec<usize> = new_lines
4730 .iter()
4731 .enumerate()
4732 .filter(|(_, l)| l.trim_end() == anchor_str)
4733 .map(|(i, _)| i)
4734 .collect();
4735
4736 if old_positions.len() == 1 && new_positions.len() == 1 {
4738 anchor_pairs.push((old_positions[0], new_positions[0]));
4739 }
4740 }
4741
4742 if anchor_pairs.is_empty() {
4744 return unified_diff_with_prefix_and_funcname_and_algorithm(
4745 old_content,
4746 new_content,
4747 old_path,
4748 new_path,
4749 context_lines,
4750 0,
4751 "a/",
4752 "b/",
4753 None,
4754 algorithm,
4755 false,
4756 use_git_histogram,
4757 indent_heuristic,
4758 quote_path_fully,
4759 );
4760 }
4761
4762 anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
4764
4765 let mut filtered: Vec<(usize, usize)> = Vec::new();
4768 for &pair in &anchor_pairs {
4769 if filtered.is_empty() || filtered.last().is_some_and(|last| pair.1 > last.1) {
4770 filtered.push(pair);
4771 }
4772 }
4773 let anchor_pairs = filtered;
4774
4775 struct LineDiffOp {
4784 tag: char, line: String,
4786 }
4787
4788 let append_segment_diff =
4789 |ops: &mut Vec<LineDiffOp>, old_seg_input: &str, new_seg_input: &str| {
4790 use similar::ChangeTag;
4791 let old_ls: Vec<&str> = old_seg_input.lines().collect();
4792 let new_ls: Vec<&str> = new_seg_input.lines().collect();
4793 if old_ls.is_empty() && new_ls.is_empty() {
4794 return;
4795 }
4796 let seg_diff = TextDiff::configure()
4797 .algorithm(algorithm)
4798 .diff_slices(&old_ls, &new_ls);
4799 let raw = seg_diff.ops().to_vec();
4800 let compacted = diff_indent_heuristic::apply_change_compact_to_ops(
4801 &raw,
4802 &old_ls,
4803 &new_ls,
4804 indent_heuristic,
4805 );
4806 for op in &compacted {
4807 for ch in op.iter_changes(&old_ls, &new_ls) {
4808 let t = match ch.tag() {
4809 ChangeTag::Equal => ' ',
4810 ChangeTag::Delete => '-',
4811 ChangeTag::Insert => '+',
4812 };
4813 ops.push(LineDiffOp {
4814 tag: t,
4815 line: ch.value().to_string(),
4816 });
4817 }
4818 }
4819 };
4820
4821 let mut ops: Vec<LineDiffOp> = Vec::new();
4822 let mut old_pos = 0usize;
4823 let mut new_pos = 0usize;
4824
4825 for &(old_anchor, new_anchor) in &anchor_pairs {
4826 let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
4828 let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
4829
4830 let old_seg_text = old_segment.join("\n");
4831 let new_seg_text = new_segment.join("\n");
4832
4833 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
4834 let old_seg_input = if old_seg_text.is_empty() {
4835 String::new()
4836 } else {
4837 format!("{}\n", old_seg_text)
4838 };
4839 let new_seg_input = if new_seg_text.is_empty() {
4840 String::new()
4841 } else {
4842 format!("{}\n", new_seg_text)
4843 };
4844 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
4845 }
4846
4847 ops.push(LineDiffOp {
4849 tag: ' ',
4850 line: old_lines[old_anchor].to_string(),
4851 });
4852
4853 old_pos = old_anchor + 1;
4854 new_pos = new_anchor + 1;
4855 }
4856
4857 let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
4859 let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
4860 let old_seg_text = old_segment.join("\n");
4861 let new_seg_text = new_segment.join("\n");
4862
4863 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
4864 let old_seg_input = if old_seg_text.is_empty() {
4865 String::new()
4866 } else {
4867 format!("{}\n", old_seg_text)
4868 };
4869 let new_seg_input = if new_seg_text.is_empty() {
4870 String::new()
4871 } else {
4872 format!("{}\n", new_seg_text)
4873 };
4874 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
4875 }
4876
4877 let mut output = String::new();
4879 if old_path == "/dev/null" {
4880 output.push_str("--- /dev/null\n");
4881 } else {
4882 output.push_str("--- ");
4883 output.push_str(&format_diff_path_with_prefix(
4884 "a/",
4885 old_path,
4886 quote_path_fully,
4887 ));
4888 output.push('\n');
4889 }
4890 if new_path == "/dev/null" {
4891 output.push_str("+++ /dev/null\n");
4892 } else {
4893 output.push_str("+++ ");
4894 output.push_str(&format_diff_path_with_prefix(
4895 "b/",
4896 new_path,
4897 quote_path_fully,
4898 ));
4899 output.push('\n');
4900 }
4901
4902 let total_ops = ops.len();
4904 if total_ops == 0 {
4905 return output;
4906 }
4907
4908 let mut hunks: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
4911 while i < total_ops {
4912 if ops[i].tag != ' ' {
4913 let start = i.saturating_sub(context_lines);
4914 let mut end = i;
4915 while end < total_ops {
4917 if ops[end].tag != ' ' {
4918 end += 1;
4919 continue;
4920 }
4921 let mut next_change = end;
4923 while next_change < total_ops && ops[next_change].tag == ' ' {
4924 next_change += 1;
4925 }
4926 if next_change < total_ops && next_change - end <= context_lines * 2 {
4927 end = next_change + 1;
4928 } else {
4929 end = (end + context_lines).min(total_ops);
4930 break;
4931 }
4932 }
4933 if let Some(last) = hunks.last_mut() {
4935 if start <= last.1 {
4936 last.1 = end;
4937 } else {
4938 hunks.push((start, end));
4939 }
4940 } else {
4941 hunks.push((start, end));
4942 }
4943 i = end;
4944 } else {
4945 i += 1;
4946 }
4947 }
4948
4949 for (start, end) in hunks {
4951 let mut old_start = 1usize;
4953 let mut new_start = 1usize;
4954 for op in &ops[..start] {
4956 match op.tag {
4957 ' ' => {
4958 old_start += 1;
4959 new_start += 1;
4960 }
4961 '-' => {
4962 old_start += 1;
4963 }
4964 '+' => {
4965 new_start += 1;
4966 }
4967 _ => {}
4968 }
4969 }
4970 let mut old_count = 0usize;
4971 let mut new_count = 0usize;
4972 for op in &ops[start..end] {
4973 match op.tag {
4974 ' ' => {
4975 old_count += 1;
4976 new_count += 1;
4977 }
4978 '-' => {
4979 old_count += 1;
4980 }
4981 '+' => {
4982 new_count += 1;
4983 }
4984 _ => {}
4985 }
4986 }
4987
4988 output.push_str(&format!(
4989 "@@ -{},{} +{},{} @@\n",
4990 old_start, old_count, new_start, new_count
4991 ));
4992 for op in &ops[start..end] {
4993 output.push(op.tag);
4994 output.push_str(&op.line);
4995 output.push('\n');
4996 }
4997 }
4998
4999 output
5000}
5001
5002fn extract_function_context(
5008 header: &str,
5009 old_lines: &[&str],
5010 funcname_matcher: Option<&FuncnameMatcher>,
5011) -> Option<String> {
5012 let at_pos = header.find("-")?;
5014 let rest = &header[at_pos + 1..];
5015 let comma_or_space = rest.find([',', ' '])?;
5016 let start_str = &rest[..comma_or_space];
5017 let start_line: usize = start_str.parse().ok()?;
5018
5019 let old_token_end = rest.find([' ', '\t']).unwrap_or(rest.len());
5023 let old_token = &rest[..old_token_end];
5024 let old_count: usize = if let Some(comma) = old_token.find(',') {
5025 old_token[comma + 1..].parse().unwrap_or(1)
5026 } else {
5027 1
5028 };
5029
5030 if start_line == 0 {
5031 return None;
5032 }
5033
5034 let search_end = if old_count == 0 {
5041 start_line.min(old_lines.len())
5042 } else {
5043 if start_line <= 1 {
5044 return None;
5045 }
5046 (start_line - 1).min(old_lines.len())
5047 };
5048 let truncate = |text: &str| {
5049 if text.len() > 80 {
5050 let mut end = 80;
5051 while end > 0 && !text.is_char_boundary(end) {
5052 end -= 1;
5053 }
5054 text[..end].to_owned()
5055 } else {
5056 text.to_owned()
5057 }
5058 };
5059
5060 for i in (0..search_end).rev() {
5061 let line = old_lines[i];
5062 if line.is_empty() {
5063 continue;
5064 }
5065 if let Some(matcher) = funcname_matcher {
5066 if let Some(matched) = matcher.match_line(line) {
5067 return Some(truncate(&matched));
5068 }
5069 continue;
5070 }
5071
5072 let first = line.as_bytes()[0];
5073 if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
5074 return Some(truncate(line.trim_end_matches(char::is_whitespace)));
5075 }
5076 }
5077 None
5078}
5079
5080pub fn format_stat_line(
5084 path: &str,
5085 insertions: usize,
5086 deletions: usize,
5087 max_path_len: usize,
5088) -> String {
5089 format_stat_line_width(path, insertions, deletions, max_path_len, 0)
5090}
5091
5092pub fn format_stat_line_width(
5093 path: &str,
5094 insertions: usize,
5095 deletions: usize,
5096 max_path_len: usize,
5097 count_width: usize,
5098) -> String {
5099 let total = insertions + deletions;
5100 let plus = "+".repeat(insertions.min(50));
5101 let minus = "-".repeat(deletions.min(50));
5102 let cw = if count_width > 0 {
5103 count_width
5104 } else {
5105 format!("{}", total).len()
5106 };
5107 let bar = format!("{}{}", plus, minus);
5108 if bar.is_empty() {
5109 format!(
5110 " {:<width$} | {:>cw$}",
5111 path,
5112 total,
5113 width = max_path_len,
5114 cw = cw
5115 )
5116 } else {
5117 format!(
5118 " {:<width$} | {:>cw$} {}",
5119 path,
5120 total,
5121 bar,
5122 width = max_path_len,
5123 cw = cw
5124 )
5125 }
5126}
5127
5128#[must_use]
5130pub fn normalize_ignore_space_change_line(line: &str) -> String {
5131 let mut result = String::with_capacity(line.len());
5132 let mut in_space = false;
5133 for c in line.chars() {
5134 if c.is_whitespace() {
5135 if !in_space {
5136 result.push(' ');
5137 in_space = true;
5138 }
5139 } else {
5140 result.push(c);
5141 in_space = false;
5142 }
5143 }
5144 while result.ends_with(' ') {
5145 result.pop();
5146 }
5147 result
5148}
5149
5150#[must_use]
5156pub fn normalize_ignore_space_change(content: &str) -> String {
5157 content
5158 .lines()
5159 .map(normalize_ignore_space_change_line)
5160 .collect::<Vec<_>>()
5161 .join("\n")
5162}
5163
5164pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
5168 count_changes_with_algorithm(old_content, new_content, similar::Algorithm::Myers, false)
5169}
5170
5171#[must_use]
5176pub fn count_changes_with_algorithm(
5177 old_content: &str,
5178 new_content: &str,
5179 algorithm: similar::Algorithm,
5180 use_git_histogram: bool,
5181) -> (usize, usize) {
5182 if use_git_histogram {
5183 use imara_diff::{Algorithm, Diff, InternedInput};
5184 let input = InternedInput::new(old_content, new_content);
5185 let mut d = Diff::compute(Algorithm::Histogram, &input);
5186 d.postprocess_lines(&input);
5187 return (d.count_additions() as usize, d.count_removals() as usize);
5188 }
5189
5190 use similar::{ChangeTag, TextDiff};
5191
5192 let diff = TextDiff::configure()
5193 .algorithm(algorithm)
5194 .diff_lines(old_content, new_content);
5195 let mut ins = 0;
5196 let mut del = 0;
5197
5198 for change in diff.iter_all_changes() {
5199 match change.tag() {
5200 ChangeTag::Insert => ins += 1,
5201 ChangeTag::Delete => del += 1,
5202 ChangeTag::Equal => {}
5203 }
5204 }
5205
5206 (ins, del)
5207}
5208
5209#[must_use]
5214pub fn count_git_lines(data: &[u8]) -> usize {
5215 if data.is_empty() {
5216 return 0;
5217 }
5218 let mut count = 0usize;
5219 let mut nl_just_seen = false;
5220 for &ch in data {
5221 if ch == b'\n' {
5222 count += 1;
5223 nl_just_seen = true;
5224 } else {
5225 nl_just_seen = false;
5226 }
5227 }
5228 if !nl_just_seen {
5229 count += 1;
5230 }
5231 count
5232}
5233
5234pub const GIT_DIFF_MAX_SCORE: u64 = 60_000;
5236const DIFF_MAX_SCORE: u64 = GIT_DIFF_MAX_SCORE;
5237const DIFF_MINIMUM_BREAK_SIZE: usize = 400;
5238const DIFF_DEFAULT_BREAK_SCORE: u64 = 30_000;
5239pub const GIT_DIFF_DEFAULT_BREAK_SCORE: u64 = DIFF_DEFAULT_BREAK_SCORE;
5241pub const GIT_DIFF_DEFAULT_MERGE_SCORE_AFTER_BREAK: u64 = 36_000;
5244const DIFF_HASHBASE: u32 = 107_927;
5245
5246#[derive(Clone, Copy, Default)]
5247struct SpanSlot {
5248 hashval: u32,
5249 cnt: u32,
5250}
5251
5252struct SpanHashTop {
5253 alloc_log2: u8,
5254 free_slots: i32,
5255 data: Vec<SpanSlot>,
5256}
5257
5258impl SpanHashTop {
5259 fn new(initial_log2: u8) -> Self {
5260 let cap = 1usize << initial_log2;
5261 Self {
5262 alloc_log2: initial_log2,
5263 free_slots: initial_free(initial_log2),
5264 data: vec![SpanSlot::default(); cap],
5265 }
5266 }
5267
5268 fn len(&self) -> usize {
5269 1usize << self.alloc_log2
5270 }
5271
5272 fn add_span(&mut self, hashval: u32, cnt: u32) {
5273 loop {
5274 let lim = self.len();
5275 let mut bucket = (hashval as usize) & (lim - 1);
5276 loop {
5277 let h = &mut self.data[bucket];
5278 if h.cnt == 0 {
5279 h.hashval = hashval;
5280 h.cnt = cnt;
5281 self.free_slots -= 1;
5282 if self.free_slots < 0 {
5283 self.rehash();
5284 break;
5285 }
5286 return;
5287 }
5288 if h.hashval == hashval {
5289 h.cnt = h.cnt.saturating_add(cnt);
5290 return;
5291 }
5292 bucket += 1;
5293 if bucket >= lim {
5294 bucket = 0;
5295 }
5296 }
5297 }
5298 }
5299
5300 fn rehash(&mut self) {
5301 let old = std::mem::take(&mut self.data);
5302 let old_log = self.alloc_log2;
5303 self.alloc_log2 = old_log.saturating_add(1);
5304 let new_len = 1usize << self.alloc_log2;
5305 self.free_slots = initial_free(self.alloc_log2);
5306 self.data = vec![SpanSlot::default(); new_len];
5307 let old_sz = 1usize << old_log;
5308 for o in old.iter().take(old_sz) {
5309 let o = *o;
5310 if o.cnt == 0 {
5311 continue;
5312 }
5313 self.add_span_after_rehash(o.hashval, o.cnt);
5314 }
5315 }
5316
5317 fn add_span_after_rehash(&mut self, hashval: u32, cnt: u32) {
5318 loop {
5319 let lim = self.len();
5320 let mut bucket = (hashval as usize) & (lim - 1);
5321 loop {
5322 let h = &mut self.data[bucket];
5323 if h.cnt == 0 {
5324 h.hashval = hashval;
5325 h.cnt = cnt;
5326 self.free_slots -= 1;
5327 if self.free_slots < 0 {
5328 self.rehash();
5329 break;
5330 }
5331 return;
5332 }
5333 if h.hashval == hashval {
5334 h.cnt = h.cnt.saturating_add(cnt);
5335 return;
5336 }
5337 bucket += 1;
5338 if bucket >= lim {
5339 bucket = 0;
5340 }
5341 }
5342 }
5343 }
5344
5345 fn sort_by_hashval(&mut self) {
5346 let sz = self.len();
5347 self.data[..sz].sort_by(|a, b| {
5348 if a.cnt == 0 {
5349 return std::cmp::Ordering::Greater;
5350 }
5351 if b.cnt == 0 {
5352 return std::cmp::Ordering::Less;
5353 }
5354 a.hashval.cmp(&b.hashval)
5355 });
5356 }
5357}
5358
5359fn initial_free(sz_log2: u8) -> i32 {
5360 let sz = sz_log2 as i32;
5361 ((1i32 << sz_log2) * (sz - 3) / sz).max(0)
5362}
5363
5364fn hash_blob_spans(buf: &[u8], is_text: bool) -> SpanHashTop {
5365 let mut hash = SpanHashTop::new(9);
5366 let mut n = 0u32;
5367 let mut accum1: u32 = 0;
5368 let mut accum2: u32 = 0;
5369 let mut i = 0usize;
5370 while i < buf.len() {
5371 let c = buf[i] as u32;
5372 let old_1 = accum1;
5373 i += 1;
5374
5375 if is_text && c == b'\r' as u32 && i < buf.len() && buf[i] == b'\n' {
5376 continue;
5377 }
5378
5379 accum1 = accum1.wrapping_shl(7) ^ accum2.wrapping_shr(25);
5380 accum2 = accum2.wrapping_shl(7) ^ old_1.wrapping_shr(25);
5381 accum1 = accum1.wrapping_add(c);
5382 n += 1;
5383 if n < 64 && c != b'\n' as u32 {
5384 continue;
5385 }
5386 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
5387 hash.add_span(hashval, n);
5388 n = 0;
5389 accum1 = 0;
5390 accum2 = 0;
5391 }
5392 if n > 0 {
5393 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
5394 hash.add_span(hashval, n);
5395 }
5396 hash.sort_by_hashval();
5397 hash
5398}
5399
5400#[must_use]
5405pub fn diffcore_count_changes(old: &[u8], new: &[u8]) -> (u64, u64) {
5406 let src_is_text = !crate::merge_file::is_binary(old);
5407 let dst_is_text = !crate::merge_file::is_binary(new);
5408 let src_count = hash_blob_spans(old, src_is_text);
5409 let dst_count = hash_blob_spans(new, dst_is_text);
5410 let mut sc: u64 = 0;
5411 let mut la: u64 = 0;
5412 let mut si = 0usize;
5413 let mut di = 0usize;
5414 let src_len = src_count.len();
5415 let dst_len = dst_count.len();
5416 loop {
5417 if si >= src_len || src_count.data[si].cnt == 0 {
5418 break;
5419 }
5420 let s_hash = src_count.data[si].hashval;
5421 let s_cnt = u64::from(src_count.data[si].cnt);
5422 while di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval < s_hash {
5423 la += u64::from(dst_count.data[di].cnt);
5424 di += 1;
5425 }
5426 let mut dst_cnt = 0u64;
5427 if di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval == s_hash {
5428 dst_cnt = u64::from(dst_count.data[di].cnt);
5429 di += 1;
5430 }
5431 if s_cnt < dst_cnt {
5432 la += dst_cnt - s_cnt;
5433 sc += s_cnt;
5434 } else {
5435 sc += dst_cnt;
5436 }
5437 si += 1;
5438 }
5439 while di < dst_len && dst_count.data[di].cnt != 0 {
5440 la += u64::from(dst_count.data[di].cnt);
5441 di += 1;
5442 }
5443 (sc, la)
5444}
5445
5446#[must_use]
5449pub fn should_break_rewrite_for_stat(old: &[u8], new: &[u8]) -> bool {
5450 should_break_rewrite_inner(old, new, DIFF_DEFAULT_BREAK_SCORE)
5451}
5452
5453#[must_use]
5457pub fn should_break_rewrite_pair(old: &[u8], new: &[u8], break_score: u64) -> bool {
5458 should_break_rewrite_inner(old, new, break_score)
5459}
5460
5461pub fn parse_diff_rename_score_token(arg: &str) -> Option<u64> {
5464 let mut num: u64 = 0;
5465 let mut scale: u64 = 1;
5466 let mut dot = false;
5467 let mut saw_digit = false;
5468 for ch in arg.chars() {
5469 if !dot && ch == '.' {
5470 scale = 1;
5471 dot = true;
5472 continue;
5473 }
5474 if ch == '%' {
5475 scale = if dot { scale.saturating_mul(100) } else { 100 };
5476 break;
5477 }
5478 if ch.is_ascii_digit() {
5479 saw_digit = true;
5480 if scale < 100_000 {
5481 scale = scale.saturating_mul(10);
5482 num = num.saturating_mul(10) + u64::from(ch as u8 - b'0');
5483 }
5484 } else {
5485 break;
5486 }
5487 }
5488 if !saw_digit {
5489 return None;
5490 }
5491 Some(if num >= scale {
5492 GIT_DIFF_MAX_SCORE
5493 } else {
5494 GIT_DIFF_MAX_SCORE * num / scale
5495 })
5496}
5497
5498#[must_use]
5501pub fn rewrite_merge_score(old: &[u8], new: &[u8]) -> Option<u64> {
5502 if old.is_empty() {
5503 return None;
5504 }
5505 let max_size = old.len().max(new.len());
5506 if max_size < DIFF_MINIMUM_BREAK_SIZE {
5507 return None;
5508 }
5509 let (src_copied, _) = diffcore_count_changes(old, new);
5510 let src_copied = src_copied.min(old.len() as u64);
5511 let src_removed = (old.len() as u64).saturating_sub(src_copied);
5512 Some(src_removed * DIFF_MAX_SCORE / old.len() as u64)
5513}
5514
5515#[must_use]
5517pub fn rewrite_dissimilarity_index_percent(old: &[u8], new: &[u8]) -> Option<u32> {
5518 let score = rewrite_merge_score(old, new)?;
5519 Some((score * 100 / DIFF_MAX_SCORE).min(100) as u32)
5520}
5521
5522fn should_break_rewrite_inner(src: &[u8], dst: &[u8], break_score: u64) -> bool {
5523 if src.is_empty() {
5524 return false;
5525 }
5526 let max_size = src.len().max(dst.len());
5527 if max_size < DIFF_MINIMUM_BREAK_SIZE {
5528 return false;
5529 }
5530 let (src_copied, literal_added) = diffcore_count_changes(src, dst);
5531 let src_copied = src_copied.min(src.len() as u64);
5532 let mut literal_added = literal_added;
5533 let dst_len = dst.len() as u64;
5534 if src_copied < dst_len && literal_added + src_copied > dst_len {
5535 literal_added = dst_len.saturating_sub(src_copied);
5536 }
5537 let src_removed = (src.len() as u64).saturating_sub(src_copied);
5538 let merge_score = src_removed * DIFF_MAX_SCORE / src.len() as u64;
5539 if merge_score > break_score {
5540 return true;
5541 }
5542 let delta_size = src_removed.saturating_add(literal_added);
5543 if delta_size * DIFF_MAX_SCORE / (max_size as u64) < break_score {
5544 return false;
5545 }
5546 let s = src.len() as u64;
5547 if (s * break_score < src_removed * DIFF_MAX_SCORE)
5548 && (literal_added * 20 < src_removed)
5549 && (literal_added * 20 < src_copied)
5550 {
5551 return false;
5552 }
5553 true
5554}
5555
5556struct FlatEntry {
5560 path: String,
5561 mode: u32,
5562 oid: ObjectId,
5563}
5564
5565fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
5566 let entries = read_tree(odb, tree_oid)?;
5567 let mut result = Vec::new();
5568
5569 for entry in entries {
5570 let name_str = String::from_utf8_lossy(&entry.name);
5571 let path = format_path(prefix, &name_str);
5572 if is_tree_mode(entry.mode) {
5573 let nested = flatten_tree(odb, &entry.oid, &path)?;
5574 result.extend(nested);
5575 } else {
5576 result.push(FlatEntry {
5577 path,
5578 mode: entry.mode,
5579 oid: entry.oid,
5580 });
5581 }
5582 }
5583
5584 Ok(result)
5585}
5586
5587pub fn head_path_states(
5589 odb: &Odb,
5590 head_tree: Option<&ObjectId>,
5591) -> Result<std::collections::BTreeMap<String, (u32, ObjectId)>> {
5592 let mut m = std::collections::BTreeMap::new();
5593 let Some(t) = head_tree else {
5594 return Ok(m);
5595 };
5596 for fe in flatten_tree(odb, t, "")? {
5597 m.insert(fe.path, (fe.mode, fe.oid));
5598 }
5599 Ok(m)
5600}
5601
5602fn is_tree_mode(mode: u32) -> bool {
5604 mode == 0o040000
5605}
5606
5607fn format_path(prefix: &str, name: &str) -> String {
5609 if prefix.is_empty() {
5610 name.to_owned()
5611 } else {
5612 format!("{prefix}/{name}")
5613 }
5614}
5615
5616pub fn format_mode(mode: u32) -> String {
5618 format!("{mode:06o}")
5619}
5620
5621#[must_use]
5625pub fn read_submodule_head_for_checkout(sub_dir: &Path) -> Option<ObjectId> {
5626 read_submodule_head(sub_dir)
5627}
5628
5629#[must_use]
5634pub fn submodule_commit_subject_line(c: &CommitData) -> String {
5635 let enc = c.encoding.as_deref().unwrap_or("UTF-8");
5636 let is_latin1 = enc.eq_ignore_ascii_case("ISO8859-1")
5637 || enc.eq_ignore_ascii_case("ISO-8859-1")
5638 || enc.eq_ignore_ascii_case("LATIN1")
5639 || enc.eq_ignore_ascii_case("ISO-8859-15");
5640 if let Some(raw) = c.raw_message.as_deref() {
5641 let line = raw.split(|b| *b == b'\n').next().unwrap_or(raw);
5642 if is_latin1 {
5643 return line
5644 .iter()
5645 .map(|&b| b as char)
5646 .collect::<String>()
5647 .trim()
5648 .to_owned();
5649 }
5650 return String::from_utf8_lossy(line).trim().to_string();
5651 }
5652 c.message.lines().next().unwrap_or("").trim().to_owned()
5653}
5654
5655fn submodule_worktree_is_unpopulated_placeholder(sub_dir: &Path) -> bool {
5658 match fs::read_dir(sub_dir) {
5659 Ok(mut it) => it.next().is_none(),
5660 Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
5661 Err(_) => false,
5662 }
5663}
5664
5665fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
5666 read_submodule_head_oid(sub_dir)
5667}
5668
5669#[must_use]
5671pub fn submodule_embedded_git_dir(sub_dir: &Path) -> Option<PathBuf> {
5672 let gitfile = sub_dir.join(".git");
5673 if gitfile.is_file() {
5674 let content = fs::read_to_string(&gitfile).ok()?;
5675 let gitdir = content
5676 .lines()
5677 .find_map(|l| l.strip_prefix("gitdir: "))?
5678 .trim();
5679 Some(if Path::new(gitdir).is_absolute() {
5680 PathBuf::from(gitdir)
5681 } else {
5682 sub_dir.join(gitdir)
5683 })
5684 } else if gitfile.is_dir() {
5685 Some(gitfile)
5686 } else {
5687 None
5688 }
5689}
5690
5691fn find_superproject_git(sub_dir: &Path) -> Option<(PathBuf, PathBuf)> {
5693 let mut cur = sub_dir.parent()?;
5694 loop {
5695 let git_path = cur.join(".git");
5696 if git_path.exists() {
5697 let gd = if git_path.is_file() {
5698 let content = fs::read_to_string(&git_path).ok()?;
5699 let line = content
5700 .lines()
5701 .find_map(|l| l.strip_prefix("gitdir: "))?
5702 .trim();
5703 if Path::new(line).is_absolute() {
5704 PathBuf::from(line)
5705 } else {
5706 cur.join(line)
5707 }
5708 } else {
5709 git_path
5710 };
5711 return Some((cur.to_path_buf(), gd));
5712 }
5713 cur = cur.parent()?;
5714 }
5715}
5716
5717pub fn read_submodule_head_oid(sub_dir: &Path) -> Option<ObjectId> {
5723 let mut git_dir = submodule_embedded_git_dir(sub_dir)?;
5726 if let Some((super_wt, super_git_dir)) = find_superproject_git(sub_dir) {
5727 let rel = sub_dir.strip_prefix(&super_wt).ok()?;
5728 let rel_str = rel.to_string_lossy().replace('\\', "/");
5729 let local_mod = super_git_dir
5730 .join("modules")
5731 .join(rel_str.trim_start_matches('/'));
5732 if local_mod.join("HEAD").exists() {
5733 let sg = super_git_dir.canonicalize().unwrap_or(super_git_dir);
5734 let cur = git_dir.canonicalize().unwrap_or_else(|_| git_dir.clone());
5735 if !cur.starts_with(&sg) {
5736 git_dir = local_mod;
5737 }
5738 }
5739 }
5740 let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
5741 let head_trimmed = head_content.trim();
5742 if head_trimmed.starts_with("ref: ") {
5743 match crate::refs::resolve_ref(&git_dir, "HEAD") {
5747 Ok(oid) => Some(oid),
5748 Err(_) => {
5749 let mut found = None;
5750 for branch in ["main", "master"] {
5751 let p = git_dir.join("refs/heads").join(branch);
5752 if let Ok(s) = fs::read_to_string(&p) {
5753 if let Ok(o) = ObjectId::from_hex(s.trim()) {
5754 found = Some(o);
5755 break;
5756 }
5757 }
5758 }
5759 found
5760 }
5761 }
5762 } else {
5763 ObjectId::from_hex(head_trimmed).ok()
5764 }
5765}
5766
5767#[must_use]
5779pub fn submodule_head_object_broken(sub_dir: &Path) -> bool {
5780 let Some(sub_git_dir) = submodule_embedded_git_dir(sub_dir) else {
5781 return false;
5782 };
5783 let Some(head_oid) = read_submodule_head_oid(sub_dir) else {
5784 return false;
5785 };
5786 let odb = Odb::new(&sub_git_dir.join("objects"));
5787 match odb.read(&head_oid) {
5788 Ok(obj) => parse_commit(&obj.data).is_err(),
5791 Err(_) => true,
5792 }
5793}
5794
5795fn submodule_has_dirty_worktree_for_super_diff(
5798 super_worktree: &Path,
5799 rel_path: &str,
5800 recorded_oid: &ObjectId,
5801) -> bool {
5802 let flags = submodule_porcelain_flags(super_worktree, rel_path, *recorded_oid);
5803 flags.modified || flags.untracked
5804}
5805
5806#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
5808pub struct SubmodulePorcelainFlags {
5809 pub new_commits: bool,
5811 pub modified: bool,
5813 pub untracked: bool,
5815}
5816
5817pub fn submodule_porcelain_flags(
5823 super_worktree: &Path,
5824 rel_path: &str,
5825 recorded_oid: ObjectId,
5826) -> SubmodulePorcelainFlags {
5827 let sub_dir = super_worktree.join(rel_path);
5828 let Some(sub_git_dir) = submodule_embedded_git_dir(&sub_dir) else {
5829 return SubmodulePorcelainFlags::default();
5830 };
5831 let Some(sub_head) = read_submodule_head_oid(&sub_dir) else {
5832 return SubmodulePorcelainFlags::default();
5833 };
5834
5835 let new_commits = sub_head != recorded_oid;
5836
5837 let index_path = sub_git_dir.join("index");
5838 let sub_index = match crate::index::Index::load(&index_path) {
5839 Ok(ix) => ix,
5840 Err(_) => {
5841 return SubmodulePorcelainFlags {
5842 new_commits,
5843 ..Default::default()
5844 }
5845 }
5846 };
5847
5848 let tracked: std::collections::BTreeSet<String> = sub_index
5849 .entries
5850 .iter()
5851 .filter(|e| e.stage() == 0)
5852 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
5853 .collect();
5854 let untracked = submodule_dir_has_untracked_inner(&sub_dir, &sub_dir, &tracked, &sub_index);
5855
5856 let objects_dir = sub_git_dir.join("objects");
5857 let odb = Odb::new(&objects_dir);
5858
5859 let sub_head_tree = (|| -> Option<ObjectId> {
5860 let h = fs::read_to_string(sub_git_dir.join("HEAD")).ok()?;
5861 let h_str = h.trim();
5862 let commit_oid = if let Some(r) = h_str.strip_prefix("ref: ") {
5863 let oid_hex = fs::read_to_string(sub_git_dir.join(r)).ok()?;
5864 ObjectId::from_hex(oid_hex.trim()).ok()?
5865 } else {
5866 ObjectId::from_hex(h_str).ok()?
5867 };
5868 let obj = odb.read(&commit_oid).ok()?;
5869 let commit = parse_commit(&obj.data).ok()?;
5870 Some(commit.tree)
5871 })();
5872
5873 let staged_dirty = sub_head_tree
5874 .as_ref()
5875 .map(|t| diff_index_to_tree(&odb, &sub_index, Some(t), false).map(|v| !v.is_empty()))
5876 .unwrap_or(Ok(false));
5877 let staged_dirty = staged_dirty.unwrap_or(false);
5878
5879 let unstaged_dirty = diff_index_to_worktree(&odb, &sub_index, &sub_dir, false, true)
5880 .map(|v| !v.is_empty())
5881 .unwrap_or(false);
5882
5883 let mut modified = staged_dirty || unstaged_dirty;
5884
5885 for e in &sub_index.entries {
5890 if e.stage() != 0 || e.mode != 0o160000 {
5891 continue;
5892 }
5893 let child = String::from_utf8_lossy(&e.path).into_owned();
5894 let full_rel = if rel_path.is_empty() {
5895 child
5896 } else {
5897 format!("{rel_path}/{child}")
5898 };
5899 let nested = submodule_porcelain_flags(super_worktree, &full_rel, e.oid);
5900 modified |= nested.modified;
5901 }
5902
5903 SubmodulePorcelainFlags {
5904 new_commits,
5905 modified,
5906 untracked,
5907 }
5908}
5909
5910fn submodule_dir_has_untracked_inner(
5911 dir: &Path,
5912 root: &Path,
5913 tracked: &std::collections::BTreeSet<String>,
5914 owning_index: &Index,
5915) -> bool {
5916 let entries = match fs::read_dir(dir) {
5917 Ok(e) => e,
5918 Err(_) => return false,
5919 };
5920 let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
5921 sorted.sort_by_key(|e| e.file_name());
5922
5923 for entry in sorted {
5924 let name = entry.file_name().to_string_lossy().to_string();
5925 if name == ".git" {
5926 continue;
5927 }
5928 let path = entry.path();
5929 let rel = path
5930 .strip_prefix(root)
5931 .map(|p| p.to_string_lossy().to_string())
5932 .unwrap_or_else(|_| name.clone());
5933
5934 let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
5935 if is_dir {
5936 let is_gitlink = owning_index
5937 .get(rel.as_bytes(), 0)
5938 .is_some_and(|e| e.mode == 0o160000);
5939 if is_gitlink {
5940 let Some(nested_git) = submodule_embedded_git_dir(&path) else {
5941 continue;
5942 };
5943 let nested_index_path = nested_git.join("index");
5944 let Ok(nested_ix) = crate::index::Index::load(&nested_index_path) else {
5945 continue;
5946 };
5947 let nested_tracked: std::collections::BTreeSet<String> = nested_ix
5948 .entries
5949 .iter()
5950 .filter(|e| e.stage() == 0)
5951 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
5952 .collect();
5953 if submodule_dir_has_untracked_inner(&path, &path, &nested_tracked, &nested_ix) {
5954 return true;
5955 }
5956 } else if submodule_dir_has_untracked_inner(&path, root, tracked, owning_index) {
5957 return true;
5958 }
5959 } else if !tracked.contains(&rel) {
5960 return true;
5961 }
5962 }
5963 false
5964}
5965
5966pub fn apply_orderfile_entries(
5969 entries: Vec<DiffEntry>,
5970 order_path: &str,
5971 cwd: &Path,
5972) -> Result<Vec<DiffEntry>> {
5973 apply_orderfile(entries, order_path, cwd)
5974}
5975
5976fn apply_orderfile(
5977 mut entries: Vec<DiffEntry>,
5978 order_path: &str,
5979 cwd: &Path,
5980) -> Result<Vec<DiffEntry>> {
5981 let patterns = read_orderfile_patterns(order_path, cwd)?;
5982 let sort_key = |entry: &DiffEntry| -> usize {
5983 let path = entry
5984 .new_path
5985 .as_ref()
5986 .or(entry.old_path.as_ref())
5987 .cloned()
5988 .unwrap_or_default();
5989 for (i, pat) in patterns.iter().enumerate() {
5990 if orderfile_pattern_matches(pat, &path) {
5991 return i;
5992 }
5993 }
5994 patterns.len()
5995 };
5996 entries.sort_by_key(|e| sort_key(e));
5997 Ok(entries)
5998}
5999
6000pub fn read_orderfile_patterns(order_path: &str, cwd: &Path) -> Result<Vec<String>> {
6002 let path = Path::new(order_path);
6003 let resolved = if path.is_absolute() {
6004 path.to_path_buf()
6005 } else {
6006 cwd.join(path)
6007 };
6008 let _meta = std::fs::metadata(&resolved)
6009 .map_err(|e| Error::Message(format!("could not read orderfile {order_path}: {e}")))?;
6010 let mut f = std::fs::File::open(&resolved)
6011 .map_err(|e| Error::Message(format!("could not read orderfile {order_path}: {e}")))?;
6012 let mut content = String::new();
6013 std::io::Read::read_to_string(&mut f, &mut content)
6014 .map_err(|e| Error::Message(format!("could not read orderfile {order_path}: {e}")))?;
6015 Ok(content
6016 .lines()
6017 .map(|l| l.trim().to_string())
6018 .filter(|l| !l.is_empty() && !l.starts_with('#'))
6019 .collect())
6020}
6021
6022pub fn apply_rotate_skip_entries(
6024 mut entries: Vec<DiffEntry>,
6025 rotate_to: Option<&str>,
6026 skip_to: Option<&str>,
6027) -> Result<Vec<DiffEntry>> {
6028 let Some(needle) = rotate_to.or(skip_to) else {
6029 return Ok(entries);
6030 };
6031 let needle = needle.trim();
6032 if needle.is_empty() {
6033 return Ok(entries);
6034 }
6035 let idx = entries
6036 .iter()
6037 .position(|e| e.path() == needle)
6038 .ok_or_else(|| Error::Message(format!("fatal: No such path '{needle}' in the diff")))?;
6039 if rotate_to.is_some() {
6040 entries.rotate_left(idx);
6041 }
6042 if let Some(skip) = skip_to.filter(|s| !s.trim().is_empty()) {
6043 let pos = entries
6044 .iter()
6045 .position(|e| e.path() == skip)
6046 .ok_or_else(|| Error::Message(format!("fatal: No such path '{skip}' in the diff")))?;
6047 entries.drain(..pos);
6048 }
6049 Ok(entries)
6050}
6051
6052pub fn apply_rotate_skip_log_entries(
6055 odb: &Odb,
6056 commit_tree: &ObjectId,
6057 entries: Vec<DiffEntry>,
6058 rotate_to: Option<&str>,
6059 skip_to: Option<&str>,
6060) -> Result<Vec<DiffEntry>> {
6061 let tree_paths = crate::merge_diff::all_blob_paths_in_tree_order(odb, commit_tree);
6062 apply_rotate_skip_ordered_paths(&tree_paths, entries, rotate_to, skip_to)
6063}
6064
6065fn apply_rotate_skip_ordered_paths(
6066 tree_paths: &[String],
6067 entries: Vec<DiffEntry>,
6068 rotate_to: Option<&str>,
6069 skip_to: Option<&str>,
6070) -> Result<Vec<DiffEntry>> {
6071 let rotate = rotate_to.and_then(|s| {
6072 let t = s.trim();
6073 (!t.is_empty()).then_some(t)
6074 });
6075 let skip = skip_to.and_then(|s| {
6076 let t = s.trim();
6077 (!t.is_empty()).then_some(t)
6078 });
6079 if rotate.is_none() && skip.is_none() {
6080 return Ok(entries);
6081 }
6082
6083 use std::collections::HashMap;
6084 let mut by_path: HashMap<String, DiffEntry> = HashMap::new();
6085 for e in entries {
6086 by_path.insert(e.path().to_string(), e);
6087 }
6088
6089 if rotate.is_none() {
6092 let Some(skip_path) = skip else {
6093 return Ok(by_path.into_values().collect());
6094 };
6095 let idx = tree_paths
6096 .iter()
6097 .position(|p| p == skip_path)
6098 .ok_or_else(|| {
6099 Error::Message(format!("fatal: No such path '{skip_path}' in the diff"))
6100 })?;
6101 let mut out = Vec::new();
6102 for p in tree_paths.iter().skip(idx) {
6103 if let Some(e) = by_path.remove(p) {
6104 out.push(e);
6105 }
6106 }
6107 return Ok(out);
6108 }
6109
6110 let Some(needle) = rotate else {
6111 return Ok(by_path.into_values().collect());
6112 };
6113 let idx = tree_paths
6114 .iter()
6115 .position(|p| p == needle)
6116 .ok_or_else(|| Error::Message(format!("fatal: No such path '{needle}' in the diff")))?;
6117 let mut order: Vec<String> = tree_paths.to_vec();
6118 order.rotate_left(idx);
6119 if let Some(skip_path) = skip {
6120 let pos = order.iter().position(|p| p == skip_path).ok_or_else(|| {
6121 Error::Message(format!("fatal: No such path '{skip_path}' in the diff"))
6122 })?;
6123 order.drain(..pos);
6124 }
6125 let mut out = Vec::new();
6126 for p in order {
6127 if let Some(e) = by_path.remove(&p) {
6128 out.push(e);
6129 }
6130 }
6131 Ok(out)
6132}
6133
6134pub fn orderfile_pattern_matches(pattern: &str, path: &str) -> bool {
6137 let name = path.rsplit('/').next().unwrap_or(path);
6138 orderfile_glob_match(pattern, name) || orderfile_glob_match(pattern, path)
6139}
6140
6141fn orderfile_glob_match(pattern: &str, text: &str) -> bool {
6143 let mut pi = 0;
6144 let mut ti = 0;
6145 let pb = pattern.as_bytes();
6146 let tb = text.as_bytes();
6147 let mut star_pi = usize::MAX;
6148 let mut star_ti = 0;
6149
6150 while ti < tb.len() {
6151 if pi < pb.len() && (pb[pi] == b'?' || pb[pi] == tb[ti]) {
6152 pi += 1;
6153 ti += 1;
6154 } else if pi < pb.len() && pb[pi] == b'*' {
6155 star_pi = pi;
6156 star_ti = ti;
6157 pi += 1;
6158 } else if star_pi != usize::MAX {
6159 pi = star_pi + 1;
6160 star_ti += 1;
6161 ti = star_ti;
6162 } else {
6163 return false;
6164 }
6165 }
6166 while pi < pb.len() && pb[pi] == b'*' {
6167 pi += 1;
6168 }
6169 pi == pb.len()
6170}