1use std::fs;
24#[cfg(unix)]
25use std::os::unix::fs::MetadataExt;
26use std::path::{Path, PathBuf};
27
28use crate::config::ConfigSet;
29use crate::diff_indent_heuristic;
30use crate::error::{Error, Result};
31use crate::index::{Index, IndexEntry};
32use crate::objects::{parse_commit, parse_tree, CommitData, ObjectId, ObjectKind, TreeEntry};
33use crate::odb::Odb;
34use crate::userdiff::FuncnameMatcher;
35
36fn imara_unified_hunk_slices(body: &str) -> Vec<&str> {
38 let mut starts: Vec<usize> = Vec::new();
39 if body.starts_with("@@") {
40 starts.push(0);
41 }
42 for (idx, _) in body.match_indices("\n@@ ") {
43 starts.push(idx + 1);
44 }
45 starts.push(body.len());
46 starts.windows(2).map(|w| &body[w[0]..w[1]]).collect()
47}
48
49fn histogram_unified_body_raw(
50 old_content: &str,
51 new_content: &str,
52 context_lines: usize,
53 inter_hunk_context: usize,
54) -> String {
55 use imara_diff::{Algorithm, Diff, Hunk, InternedInput};
56 use std::fmt::Write as _;
57
58 let input = InternedInput::new(old_content, new_content);
59 let mut diff = Diff::compute(Algorithm::Histogram, &input);
60 diff.postprocess_lines(&input);
61
62 let hunks: Vec<Hunk> = diff.hunks().collect();
68 if hunks.is_empty() {
69 return String::new();
70 }
71
72 let ctx = context_lines.min(u32::MAX as usize) as u32;
73 let max_gap = (2usize.saturating_mul(context_lines))
74 .saturating_add(inter_hunk_context)
75 .min(u32::MAX as usize) as u32;
76 let before_len = input.before.len() as u32;
77 let after_len = input.after.len() as u32;
78
79 let mut groups: Vec<&[Hunk]> = Vec::new();
81 let mut group_start = 0usize;
82 for i in 1..hunks.len() {
83 if hunks[i].before.start - hunks[i - 1].before.end > max_gap {
84 groups.push(&hunks[group_start..i]);
85 group_start = i;
86 }
87 }
88 groups.push(&hunks[group_start..]);
89
90 fn push_line(out: &mut String, prefix: char, text: &str) {
91 out.push(prefix);
92 out.push_str(text);
93 if !text.ends_with('\n') {
94 out.push('\n');
95 }
96 }
97
98 fn fmt_side(start: u32, count: u32) -> String {
101 let shown_start = if count == 0 { start } else { start + 1 };
102 if count == 1 {
103 format!("{shown_start}")
104 } else {
105 format!("{shown_start},{count}")
106 }
107 }
108
109 let mut out = String::new();
110 for group in groups {
111 let first = &group[0];
112 let last = &group[group.len() - 1];
113 let b_start = first.before.start.saturating_sub(ctx);
114 let a_start = first.after.start.saturating_sub(ctx);
115 let b_end = (last.before.end.saturating_add(ctx)).min(before_len);
116 let a_end = (last.after.end.saturating_add(ctx)).min(after_len);
117
118 let _ = writeln!(
119 out,
120 "@@ -{} +{} @@",
121 fmt_side(b_start, b_end - b_start),
122 fmt_side(a_start, a_end - a_start)
123 );
124
125 let mut pos = b_start;
126 for hunk in group {
127 for &token in &input.before[pos as usize..hunk.before.start as usize] {
128 push_line(&mut out, ' ', input.interner[token]);
129 }
130 for &token in &input.before[hunk.before.start as usize..hunk.before.end as usize] {
131 push_line(&mut out, '-', input.interner[token]);
132 }
133 for &token in &input.after[hunk.after.start as usize..hunk.after.end as usize] {
134 push_line(&mut out, '+', input.interner[token]);
135 }
136 pos = hunk.before.end;
137 }
138 for &token in &input.before[pos as usize..b_end as usize] {
139 push_line(&mut out, ' ', input.interner[token]);
140 }
141 }
142
143 out
144}
145
146#[must_use]
150pub fn unified_diff_histogram_hunks_only(
151 old_content: &str,
152 new_content: &str,
153 context_lines: usize,
154 inter_hunk_context: usize,
155) -> String {
156 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context)
157}
158
159#[must_use]
161pub fn unified_diff_histogram_with_prefix_and_funcname(
162 old_content: &str,
163 new_content: &str,
164 old_path: &str,
165 new_path: &str,
166 context_lines: usize,
167 inter_hunk_context: usize,
168 src_prefix: &str,
169 dst_prefix: &str,
170 funcname_matcher: Option<&FuncnameMatcher>,
171 quote_path_fully: bool,
172) -> String {
173 use crate::quote_path::format_diff_path_with_prefix;
174
175 let body =
176 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context);
177
178 let mut output = String::new();
179 if old_path == "/dev/null" {
180 output.push_str("--- /dev/null\n");
181 } else if src_prefix.is_empty() {
182 output.push_str(&format!("--- {old_path}\n"));
183 } else {
184 output.push_str("--- ");
185 output.push_str(&format_diff_path_with_prefix(
186 src_prefix,
187 old_path,
188 quote_path_fully,
189 ));
190 output.push('\n');
191 }
192 if new_path == "/dev/null" {
193 output.push_str("+++ /dev/null\n");
194 } else if dst_prefix.is_empty() {
195 output.push_str(&format!("+++ {new_path}\n"));
196 } else {
197 output.push_str("+++ ");
198 output.push_str(&format_diff_path_with_prefix(
199 dst_prefix,
200 new_path,
201 quote_path_fully,
202 ));
203 output.push('\n');
204 }
205
206 let old_lines: Vec<&str> = old_content.lines().collect();
207 for hunk_str in imara_unified_hunk_slices(&body) {
208 if hunk_str.is_empty() {
209 continue;
210 }
211 if let Some(first_newline) = hunk_str.find('\n') {
212 let header_line = &hunk_str[..first_newline];
213 let rest = &hunk_str[first_newline..];
214 if let Some(func_ctx) =
215 extract_function_context(header_line, &old_lines, funcname_matcher)
216 {
217 output.push_str(header_line);
218 output.push(' ');
219 output.push_str(&func_ctx);
220 output.push_str(rest);
221 } else {
222 output.push_str(hunk_str);
223 }
224 } else {
225 output.push_str(hunk_str);
226 }
227 }
228
229 output
230}
231
232#[must_use]
234pub fn indent_heuristic_from_config(config: &ConfigSet) -> bool {
235 match config.get_bool("diff.indentHeuristic") {
236 Some(Ok(b)) => b,
237 Some(Err(_)) | None => true,
238 }
239}
240
241#[must_use]
243pub fn resolve_indent_heuristic(
244 config: &ConfigSet,
245 cli_indent_heuristic: bool,
246 cli_no_indent_heuristic: bool,
247) -> bool {
248 if cli_no_indent_heuristic {
249 false
250 } else if cli_indent_heuristic {
251 true
252 } else {
253 indent_heuristic_from_config(config)
254 }
255}
256
257#[must_use]
259pub fn parse_indent_heuristic_cli_flags(argv: &[String]) -> (bool, bool) {
260 let mut indent_heuristic = false;
261 let mut no_indent_heuristic = false;
262 for a in argv {
263 match a.as_str() {
264 "--indent-heuristic" => {
265 indent_heuristic = true;
266 no_indent_heuristic = false;
267 }
268 "--no-indent-heuristic" => {
269 no_indent_heuristic = true;
270 indent_heuristic = false;
271 }
272 _ => {}
273 }
274 }
275 (indent_heuristic, no_indent_heuristic)
276}
277
278#[must_use]
283pub fn word_diff_ops_imara(old_words: &[&str], new_words: &[&str]) -> Vec<similar::DiffOp> {
284 use imara_diff::{Algorithm, Diff, InternedInput};
285 use similar::DiffOp;
286
287 let mut input: InternedInput<&str> = InternedInput::default();
288 input.update_before(old_words.iter().copied());
289 input.update_after(new_words.iter().copied());
290 let mut diff = Diff::compute(Algorithm::Myers, &input);
291 diff.postprocess_lines(&input);
292
293 let mut ops: Vec<DiffOp> = Vec::new();
294 let mut old_pos = 0usize;
295 let mut new_pos = 0usize;
296 for hunk in diff.hunks() {
297 let b_start = hunk.before.start as usize;
298 let b_end = hunk.before.end as usize;
299 let a_start = hunk.after.start as usize;
300 let a_end = hunk.after.end as usize;
301 if b_start > old_pos {
302 let len = b_start - old_pos;
303 ops.push(DiffOp::Equal {
304 old_index: old_pos,
305 new_index: new_pos,
306 len,
307 });
308 }
309 let del = b_end - b_start;
310 let ins = a_end - a_start;
311 if del > 0 && ins > 0 {
312 ops.push(DiffOp::Replace {
313 old_index: b_start,
314 old_len: del,
315 new_index: a_start,
316 new_len: ins,
317 });
318 } else if del > 0 {
319 ops.push(DiffOp::Delete {
320 old_index: b_start,
321 old_len: del,
322 new_index: a_start,
323 });
324 } else if ins > 0 {
325 ops.push(DiffOp::Insert {
326 old_index: b_start,
327 new_index: a_start,
328 new_len: ins,
329 });
330 }
331 old_pos = b_end;
332 new_pos = a_end;
333 }
334 if old_pos < old_words.len() {
335 ops.push(DiffOp::Equal {
336 old_index: old_pos,
337 new_index: new_pos,
338 len: old_words.len() - old_pos,
339 });
340 }
341 diff_indent_heuristic::apply_change_compact_to_ops(&ops, old_words, new_words, false)
344}
345
346#[must_use]
348pub fn diff_slice_ops_compacted(
349 old_lines: &[&str],
350 new_lines: &[&str],
351 algorithm: similar::Algorithm,
352 indent_heuristic: bool,
353) -> Vec<similar::DiffOp> {
354 diff_indent_heuristic::diff_slice_ops_compacted(
355 old_lines,
356 new_lines,
357 algorithm,
358 indent_heuristic,
359 )
360}
361
362#[must_use]
364pub fn map_new_to_old_lines_compacted(
365 old_joined: &str,
366 new_joined: &str,
367 algorithm: similar::Algorithm,
368 indent_heuristic: bool,
369 new_line_count: usize,
370) -> Vec<Option<usize>> {
371 let ops = diff_indent_heuristic::diff_lines_ops_compacted(
372 old_joined,
373 new_joined,
374 algorithm,
375 indent_heuristic,
376 );
377 diff_indent_heuristic::map_new_to_old_from_ops(&ops, new_line_count)
378}
379
380#[derive(Debug, Clone, Copy, PartialEq, Eq)]
382pub enum DiffStatus {
383 Added,
385 Deleted,
387 Modified,
389 Renamed,
391 Copied,
393 TypeChanged,
395 Unmerged,
397}
398
399impl DiffStatus {
400 #[must_use]
402 pub fn letter(&self) -> char {
403 match self {
404 Self::Added => 'A',
405 Self::Deleted => 'D',
406 Self::Modified => 'M',
407 Self::Renamed => 'R',
408 Self::Copied => 'C',
409 Self::TypeChanged => 'T',
410 Self::Unmerged => 'U',
411 }
412 }
413}
414
415#[derive(Debug, Clone, PartialEq, Eq)]
417pub struct DiffEntry {
418 pub status: DiffStatus,
420 pub old_path: Option<String>,
422 pub new_path: Option<String>,
424 pub old_mode: String,
426 pub new_mode: String,
428 pub old_oid: ObjectId,
430 pub new_oid: ObjectId,
432 pub score: Option<u32>,
434}
435
436impl DiffEntry {
437 #[must_use]
439 pub fn path(&self) -> &str {
440 self.new_path
441 .as_deref()
442 .or(self.old_path.as_deref())
443 .unwrap_or("")
444 }
445
446 #[must_use]
451 pub fn display_path(&self) -> String {
452 match self.status {
453 DiffStatus::Renamed | DiffStatus::Copied => {
454 let old = self.old_path.as_deref().unwrap_or("");
455 let new = self.new_path.as_deref().unwrap_or("");
456 if old.is_empty() || new.is_empty() {
457 self.path().to_owned()
458 } else {
459 format!("{old} -> {new}")
460 }
461 }
462 _ => self.path().to_owned(),
463 }
464 }
465}
466
467pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
469
470#[must_use]
472pub fn zero_oid() -> ObjectId {
473 ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
474 panic!("internal error: failed to create zero OID");
476 })
477}
478
479#[must_use]
481pub fn empty_blob_oid() -> ObjectId {
482 ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
483 panic!("internal error: failed to create empty blob OID");
485 })
486}
487
488pub fn diff_trees(
503 odb: &Odb,
504 old_tree_oid: Option<&ObjectId>,
505 new_tree_oid: Option<&ObjectId>,
506 prefix: &str,
507) -> Result<Vec<DiffEntry>> {
508 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
509}
510
511pub fn diff_trees_show_tree_entries(
515 odb: &Odb,
516 old_tree_oid: Option<&ObjectId>,
517 new_tree_oid: Option<&ObjectId>,
518 prefix: &str,
519) -> Result<Vec<DiffEntry>> {
520 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
521}
522
523fn diff_trees_opts(
524 odb: &Odb,
525 old_tree_oid: Option<&ObjectId>,
526 new_tree_oid: Option<&ObjectId>,
527 prefix: &str,
528 show_trees: bool,
529) -> Result<Vec<DiffEntry>> {
530 let old_entries = match old_tree_oid {
531 Some(oid) => read_tree(odb, oid)?,
532 None => Vec::new(),
533 };
534 let new_entries = match new_tree_oid {
535 Some(oid) => read_tree(odb, oid)?,
536 None => Vec::new(),
537 };
538
539 let mut result = Vec::new();
540 diff_tree_entries_opts(
541 odb,
542 &old_entries,
543 &new_entries,
544 prefix,
545 show_trees,
546 &mut result,
547 )?;
548 Ok(result)
549}
550
551fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
553 let obj = odb.read(oid)?;
554 if obj.kind != ObjectKind::Tree {
555 return Err(Error::CorruptObject(format!(
556 "expected tree, got {}",
557 obj.kind.as_str()
558 )));
559 }
560 parse_tree(&obj.data)
561}
562
563fn diff_tree_entries_opts(
565 odb: &Odb,
566 old: &[TreeEntry],
567 new: &[TreeEntry],
568 prefix: &str,
569 show_trees: bool,
570 result: &mut Vec<DiffEntry>,
571) -> Result<()> {
572 let mut oi = 0;
573 let mut ni = 0;
574
575 while oi < old.len() || ni < new.len() {
576 match (old.get(oi), new.get(ni)) {
577 (Some(o), Some(n)) => {
578 let cmp = crate::objects::tree_entry_cmp(
579 &o.name,
580 is_tree_mode(o.mode),
581 &n.name,
582 is_tree_mode(n.mode),
583 );
584 match cmp {
585 std::cmp::Ordering::Less => {
586 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
588 oi += 1;
589 }
590 std::cmp::Ordering::Greater => {
591 emit_added_opts(odb, n, prefix, show_trees, result)?;
593 ni += 1;
594 }
595 std::cmp::Ordering::Equal => {
596 if o.oid != n.oid || o.mode != n.mode {
598 let name_str = String::from_utf8_lossy(&o.name);
599 let path = format_path(prefix, &name_str);
600 if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
601 if show_trees {
603 result.push(DiffEntry {
604 status: DiffStatus::Modified,
605 old_path: Some(path.clone()),
606 new_path: Some(path.clone()),
607 old_mode: format_mode(o.mode),
608 new_mode: format_mode(n.mode),
609 old_oid: o.oid,
610 new_oid: n.oid,
611 score: None,
612 });
613 }
614 let nested = diff_trees_opts(
616 odb,
617 Some(&o.oid),
618 Some(&n.oid),
619 &path,
620 show_trees,
621 )?;
622 result.extend(nested);
623 } else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
624 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
626 emit_added_opts(odb, n, prefix, show_trees, result)?;
627 } else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
628 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
630 emit_added_opts(odb, n, prefix, show_trees, result)?;
631 } else {
632 let old_type = o.mode & 0o170000;
636 let new_type = n.mode & 0o170000;
637 result.push(DiffEntry {
638 status: if old_type != new_type {
639 DiffStatus::TypeChanged
640 } else {
641 DiffStatus::Modified
642 },
643 old_path: Some(path.clone()),
644 new_path: Some(path),
645 old_mode: format_mode(o.mode),
646 new_mode: format_mode(n.mode),
647 old_oid: o.oid,
648 new_oid: n.oid,
649 score: None,
650 });
651 }
652 }
653 oi += 1;
654 ni += 1;
655 }
656 }
657 }
658 (Some(o), None) => {
659 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
660 oi += 1;
661 }
662 (None, Some(n)) => {
663 emit_added_opts(odb, n, prefix, show_trees, result)?;
664 ni += 1;
665 }
666 (None, None) => break,
667 }
668 }
669
670 Ok(())
671}
672
673fn emit_deleted_opts(
674 odb: &Odb,
675 entry: &TreeEntry,
676 prefix: &str,
677 show_trees: bool,
678 result: &mut Vec<DiffEntry>,
679) -> Result<()> {
680 let name_str = String::from_utf8_lossy(&entry.name);
681 let path = format_path(prefix, &name_str);
682 if is_tree_mode(entry.mode) {
683 if show_trees {
684 result.push(DiffEntry {
685 status: DiffStatus::Deleted,
686 old_path: Some(path.clone()),
687 new_path: None,
688 old_mode: format_mode(entry.mode),
689 new_mode: "000000".to_owned(),
690 old_oid: entry.oid,
691 new_oid: zero_oid(),
692 score: None,
693 });
694 }
695 let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
697 result.extend(nested);
698 } else {
699 result.push(DiffEntry {
700 status: DiffStatus::Deleted,
701 old_path: Some(path.clone()),
702 new_path: None,
703 old_mode: format_mode(entry.mode),
704 new_mode: "000000".to_owned(),
705 old_oid: entry.oid,
706 new_oid: zero_oid(),
707 score: None,
708 });
709 }
710 Ok(())
711}
712
713fn emit_added_opts(
714 odb: &Odb,
715 entry: &TreeEntry,
716 prefix: &str,
717 show_trees: bool,
718 result: &mut Vec<DiffEntry>,
719) -> Result<()> {
720 let name_str = String::from_utf8_lossy(&entry.name);
721 let path = format_path(prefix, &name_str);
722 if is_tree_mode(entry.mode) {
723 if show_trees {
724 result.push(DiffEntry {
725 status: DiffStatus::Added,
726 old_path: None,
727 new_path: Some(path.clone()),
728 old_mode: "000000".to_owned(),
729 new_mode: format_mode(entry.mode),
730 old_oid: zero_oid(),
731 new_oid: entry.oid,
732 score: None,
733 });
734 }
735 let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
737 result.extend(nested);
738 } else {
739 result.push(DiffEntry {
740 status: DiffStatus::Added,
741 old_path: None,
742 new_path: Some(path),
743 old_mode: "000000".to_owned(),
744 new_mode: format_mode(entry.mode),
745 old_oid: zero_oid(),
746 new_oid: entry.oid,
747 score: None,
748 });
749 }
750 Ok(())
751}
752
753pub fn diff_index_to_tree(
773 odb: &Odb,
774 index: &Index,
775 tree_oid: Option<&ObjectId>,
776 ignore_submodules: bool,
777) -> Result<Vec<DiffEntry>> {
778 let tree_entries = match tree_oid {
780 Some(oid) => flatten_tree(odb, oid, "")?,
781 None => Vec::new(),
782 };
783
784 let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
786 std::collections::BTreeMap::new();
787 for entry in &tree_entries {
788 tree_map.insert(&entry.path, entry);
789 }
790
791 let mut result = Vec::new();
792 let mut stage0_paths = std::collections::BTreeSet::new();
793 let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
794 std::collections::BTreeMap::new();
795
796 for ie in &index.entries {
798 let path = String::from_utf8_lossy(&ie.path).to_string();
799 if ie.stage() == 0 && ie.intent_to_add() {
800 continue;
803 }
804 if ie.stage() != 0 {
805 let rank = match ie.stage() {
806 2 => 0u8,
807 3 => 1u8,
808 1 => 2u8,
809 _ => 3u8,
810 };
811 match unmerged_modes.get(&path) {
812 Some((existing_rank, _)) if *existing_rank <= rank => {}
813 _ => {
814 unmerged_modes.insert(path, (rank, ie.mode));
815 }
816 }
817 continue;
818 }
819 if ignore_submodules && ie.mode == 0o160000 {
820 let _ = tree_map.remove(path.as_str());
821 stage0_paths.insert(path.clone());
822 continue;
823 }
824 stage0_paths.insert(path.clone());
825 match tree_map.remove(path.as_str()) {
826 Some(te) => {
827 if te.oid != ie.oid || te.mode != ie.mode {
829 result.push(DiffEntry {
830 status: DiffStatus::Modified,
831 old_path: Some(path.clone()),
832 new_path: Some(path),
833 old_mode: format_mode(te.mode),
834 new_mode: format_mode(ie.mode),
835 old_oid: te.oid,
836 new_oid: ie.oid,
837 score: None,
838 });
839 }
840 }
841 None => {
842 result.push(DiffEntry {
844 status: DiffStatus::Added,
845 old_path: None,
846 new_path: Some(path),
847 old_mode: "000000".to_owned(),
848 new_mode: format_mode(ie.mode),
849 old_oid: zero_oid(),
850 new_oid: ie.oid,
851 score: None,
852 });
853 }
854 }
855 }
856
857 for (path, (_, mode)) in &unmerged_modes {
858 if stage0_paths.contains(path) {
859 continue;
860 }
861 tree_map.remove(path.as_str());
862 result.push(DiffEntry {
863 status: DiffStatus::Unmerged,
864 old_path: Some(path.clone()),
865 new_path: Some(path.clone()),
866 old_mode: "000000".to_owned(),
867 new_mode: format_mode(*mode),
868 old_oid: zero_oid(),
869 new_oid: zero_oid(),
870 score: None,
871 });
872 }
873
874 for (path, te) in tree_map {
876 if ignore_submodules && te.mode == 0o160000 {
877 continue;
878 }
879 result.push(DiffEntry {
880 status: DiffStatus::Deleted,
881 old_path: Some(path.to_owned()),
882 new_path: None,
883 old_mode: format_mode(te.mode),
884 new_mode: "000000".to_owned(),
885 old_oid: te.oid,
886 new_oid: zero_oid(),
887 score: None,
888 });
889 }
890
891 result.sort_by(|a, b| a.path().cmp(b.path()));
892 Ok(result)
893}
894
895pub fn diff_index_to_worktree(
920 odb: &Odb,
921 index: &Index,
922 work_tree: &Path,
923 ignore_submodule_untracked: bool,
924 simplify_gitlinks: bool,
925) -> Result<Vec<DiffEntry>> {
926 diff_index_to_worktree_with_options(
927 odb,
928 index,
929 work_tree,
930 DiffIndexToWorktreeOptions {
931 ignore_submodule_untracked,
932 simplify_gitlinks,
933 ..DiffIndexToWorktreeOptions::default()
934 },
935 )
936}
937
938#[derive(Debug, Clone, Copy, Default)]
940pub struct DiffIndexToWorktreeOptions {
941 pub index_mtime: Option<(u32, u32)>,
947 pub ignore_submodule_untracked: bool,
949 pub simplify_gitlinks: bool,
951 pub error_on_broken_gitlinks: bool,
954}
955
956pub fn diff_index_to_worktree_with_options(
973 odb: &Odb,
974 index: &Index,
975 work_tree: &Path,
976 options: DiffIndexToWorktreeOptions,
977) -> Result<Vec<DiffEntry>> {
978 use crate::config::ConfigSet;
979 use crate::crlf;
980
981 let ignore_submodule_untracked = options.ignore_submodule_untracked;
982 let simplify_gitlinks = options.simplify_gitlinks;
983
984 let git_dir = work_tree.join(".git");
985 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
986 let conv = crlf::ConversionConfig::from_config(&config);
987 let attrs = crlf::load_gitattributes(work_tree);
988
989 let mut result = Vec::new();
990 let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
991 std::collections::BTreeMap::new();
992
993 for ie in &index.entries {
994 if ie.stage() != 0 {
995 let path = String::from_utf8_lossy(&ie.path).to_string();
996 let rank = match ie.stage() {
997 2 => 0u8,
998 3 => 1u8,
999 1 => 2u8,
1000 _ => 3u8,
1001 };
1002 match unmerged_base.get(&path) {
1003 Some((existing_rank, _)) if *existing_rank <= rank => {}
1004 _ => {
1005 unmerged_base.insert(path, (rank, ie));
1006 }
1007 }
1008 continue;
1009 }
1010 if ie.skip_worktree() || ie.assume_unchanged() {
1013 continue;
1014 }
1015 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
1018 let is_intent_to_add = ie.intent_to_add();
1019
1020 if ie.mode == 0o160000 {
1025 let sub_dir = work_tree.join(path_str_ref);
1026 let sub_head_oid = read_submodule_head_oid(&sub_dir);
1027 let ref_matches = if let Some(oid) = sub_head_oid {
1028 oid == ie.oid
1029 } else {
1030 let is_placeholder = submodule_worktree_is_unpopulated_placeholder(&sub_dir);
1031 if options.error_on_broken_gitlinks
1032 && !is_placeholder
1033 && submodule_embedded_git_dir(&sub_dir).is_some()
1034 {
1035 return Err(Error::ConfigError(format!(
1036 "could not read submodule HEAD for '{path_str_ref}'"
1037 )));
1038 }
1039 is_placeholder
1040 };
1041 if simplify_gitlinks {
1042 if !ref_matches {
1043 let path_owned = path_str_ref.to_owned();
1044 let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
1045 result.push(DiffEntry {
1046 status: DiffStatus::Modified,
1047 old_path: Some(path_owned.clone()),
1048 new_path: Some(path_owned),
1049 old_mode: format_mode(ie.mode),
1050 new_mode: format_mode(ie.mode),
1051 old_oid: ie.oid,
1052 new_oid,
1053 score: None,
1054 });
1055 }
1056 continue;
1057 }
1058 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
1059 if ignore_submodule_untracked {
1060 flags.untracked = false;
1061 }
1062 let inner_dirty = flags.modified || flags.untracked;
1063 if !ref_matches || inner_dirty {
1064 let path_owned = path_str_ref.to_owned();
1065 let new_oid = if !ref_matches {
1066 sub_head_oid.unwrap_or_else(zero_oid)
1067 } else {
1068 zero_oid()
1069 };
1070 result.push(DiffEntry {
1071 status: DiffStatus::Modified,
1072 old_path: Some(path_owned.clone()),
1073 new_path: Some(path_owned),
1074 old_mode: format_mode(ie.mode),
1075 new_mode: format_mode(ie.mode),
1076 old_oid: ie.oid,
1077 new_oid,
1078 score: None,
1079 });
1080 }
1081 continue;
1082 }
1083
1084 let file_path = work_tree.join(path_str_ref);
1085
1086 if is_intent_to_add {
1087 match fs::symlink_metadata(&file_path) {
1088 Ok(meta) => {
1089 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
1090 let worktree_oid = hash_worktree_file(
1091 odb,
1092 &file_path,
1093 &meta,
1094 &conv,
1095 &file_attrs,
1096 path_str_ref,
1097 None,
1098 )?;
1099 let worktree_mode = mode_from_metadata(&meta);
1100 result.push(DiffEntry {
1101 status: DiffStatus::Added,
1102 old_path: None,
1103 new_path: Some(path_str_ref.to_owned()),
1104 old_mode: "000000".to_owned(),
1105 new_mode: format_mode(worktree_mode),
1106 old_oid: zero_oid(),
1109 new_oid: worktree_oid,
1110 score: None,
1111 });
1112 }
1113 Err(e)
1114 if e.kind() == std::io::ErrorKind::NotFound
1115 || e.raw_os_error() == Some(20) =>
1116 {
1117 result.push(DiffEntry {
1118 status: DiffStatus::Deleted,
1119 old_path: Some(path_str_ref.to_owned()),
1120 new_path: None,
1121 old_mode: format_mode(ie.mode),
1122 new_mode: "000000".to_owned(),
1123 old_oid: ie.oid,
1124 new_oid: zero_oid(),
1125 score: None,
1126 });
1127 }
1128 Err(e) => return Err(Error::Io(e)),
1129 }
1130 continue;
1131 }
1132
1133 if has_symlink_in_path(work_tree, path_str_ref) {
1136 result.push(DiffEntry {
1137 status: DiffStatus::Deleted,
1138 old_path: Some(path_str_ref.to_owned()),
1139 new_path: None,
1140 old_mode: format_mode(ie.mode),
1141 new_mode: "000000".to_owned(),
1142 old_oid: ie.oid,
1143 new_oid: zero_oid(),
1144 score: None,
1145 });
1146 continue;
1147 }
1148
1149 match fs::symlink_metadata(&file_path) {
1150 Ok(meta) if meta.is_dir() => {
1151 if file_path.join(".git").exists() {
1156 let head = read_submodule_head_oid(&file_path).unwrap_or_else(zero_oid);
1157 let path_owned = path_str_ref.to_owned();
1158 result.push(DiffEntry {
1159 status: DiffStatus::TypeChanged,
1160 old_path: Some(path_owned.clone()),
1161 new_path: Some(path_owned),
1162 old_mode: format_mode(ie.mode),
1163 new_mode: format_mode(0o160000),
1164 old_oid: ie.oid,
1165 new_oid: head,
1166 score: None,
1167 });
1168 continue;
1169 }
1170 result.push(DiffEntry {
1171 status: DiffStatus::Deleted,
1172 old_path: Some(path_str_ref.to_owned()),
1173 new_path: None,
1174 old_mode: format_mode(ie.mode),
1175 new_mode: String::new(),
1176 old_oid: ie.oid,
1177 new_oid: zero_oid(),
1178 score: None,
1179 });
1180 }
1181 Ok(meta) => {
1182 let worktree_mode = mode_from_metadata(&meta);
1183 let stat_same = stat_matches(ie, &meta);
1184 if stat_same && worktree_mode != ie.mode {
1186 let path_owned = path_str_ref.to_owned();
1187 result.push(DiffEntry {
1188 status: DiffStatus::Modified,
1189 old_path: Some(path_owned.clone()),
1190 new_path: Some(path_owned),
1191 old_mode: format_mode(ie.mode),
1192 new_mode: format_mode(worktree_mode),
1193 old_oid: ie.oid,
1194 new_oid: ie.oid,
1195 score: None,
1196 });
1197 continue;
1198 }
1199
1200 if stat_same && worktree_mode == ie.mode && !entry_is_racy(ie, options.index_mtime) {
1203 continue;
1204 }
1205
1206 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
1208 let worktree_oid = hash_worktree_file(
1209 odb,
1210 &file_path,
1211 &meta,
1212 &conv,
1213 &file_attrs,
1214 path_str_ref,
1215 Some(ie),
1216 )?;
1217
1218 let mut eff_oid = worktree_oid;
1222 if eff_oid != ie.oid {
1223 if let Ok(raw) = fs::read(&file_path) {
1224 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1225 if raw_oid == ie.oid {
1226 eff_oid = ie.oid;
1227 }
1228 }
1229 }
1230
1231 if eff_oid != ie.oid || worktree_mode != ie.mode {
1232 let path_owned = path_str_ref.to_owned();
1233 result.push(DiffEntry {
1234 status: DiffStatus::Modified,
1235 old_path: Some(path_owned.clone()),
1236 new_path: Some(path_owned),
1237 old_mode: format_mode(ie.mode),
1238 new_mode: format_mode(worktree_mode),
1239 old_oid: ie.oid,
1240 new_oid: eff_oid,
1241 score: None,
1242 });
1243 }
1244 }
1245 Err(e) if e.kind() == std::io::ErrorKind::NotFound
1246 || e.raw_os_error() == Some(20) => {
1247 result.push(DiffEntry {
1249 status: DiffStatus::Deleted,
1250 old_path: Some(path_str_ref.to_owned()),
1251 new_path: None,
1252 old_mode: format_mode(ie.mode),
1253 new_mode: "000000".to_owned(),
1254 old_oid: ie.oid,
1255 new_oid: zero_oid(),
1256 score: None,
1257 });
1258 }
1259 Err(e) => return Err(Error::Io(e)),
1260 }
1261 }
1262
1263 for (path, (_, base_entry)) in unmerged_base {
1264 let file_path = work_tree.join(&path);
1265 let wt_meta = match fs::symlink_metadata(&file_path) {
1266 Ok(meta) => Some(meta),
1267 Err(e)
1268 if e.kind() == std::io::ErrorKind::NotFound
1269 || e.raw_os_error() == Some(20) =>
1270 {
1271 None
1272 }
1273 Err(e) => return Err(Error::Io(e)),
1274 };
1275
1276 let new_mode = wt_meta.as_ref().map_or_else(
1277 || "000000".to_owned(),
1278 |meta| format_mode(mode_from_metadata(meta)),
1279 );
1280 result.push(DiffEntry {
1281 status: DiffStatus::Unmerged,
1282 old_path: Some(path.clone()),
1283 new_path: Some(path.clone()),
1284 old_mode: "000000".to_owned(),
1285 new_mode,
1286 old_oid: zero_oid(),
1287 new_oid: zero_oid(),
1288 score: None,
1289 });
1290
1291 if let Some(meta) = wt_meta {
1292 let file_attrs = crlf::get_file_attrs(&attrs, &path, false, &config);
1293 let wt_oid = hash_worktree_file(
1294 odb,
1295 &file_path,
1296 &meta,
1297 &conv,
1298 &file_attrs,
1299 &path,
1300 Some(base_entry),
1301 )?;
1302 let wt_mode = mode_from_metadata(&meta);
1303 if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
1304 result.push(DiffEntry {
1305 status: DiffStatus::Modified,
1306 old_path: Some(path.clone()),
1307 new_path: Some(path),
1308 old_mode: format_mode(base_entry.mode),
1309 new_mode: format_mode(wt_mode),
1310 old_oid: base_entry.oid,
1311 new_oid: wt_oid,
1312 score: None,
1313 });
1314 }
1315 }
1316 }
1317
1318 Ok(result)
1319}
1320
1321fn entry_is_racy(ie: &IndexEntry, index_mtime: Option<(u32, u32)>) -> bool {
1322 let Some((index_mtime_sec, index_mtime_nsec)) = index_mtime else {
1323 return false;
1324 };
1325 if index_mtime_sec == 0 {
1326 return false;
1327 }
1328 index_mtime_sec < ie.mtime_sec
1329 || (index_mtime_sec == ie.mtime_sec && index_mtime_nsec <= ie.mtime_nsec)
1330}
1331
1332pub fn worktree_differs_from_index_entry(
1340 odb: &Odb,
1341 work_tree: &Path,
1342 ie: &IndexEntry,
1343 ignore_submodule_untracked: bool,
1344) -> Result<bool> {
1345 use crate::config::ConfigSet;
1346 use crate::crlf;
1347
1348 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
1349 let file_path = work_tree.join(path_str_ref);
1350
1351 if ie.mode == 0o160000 {
1352 let sub_head_oid = read_submodule_head(&file_path);
1353 let ref_matches = match sub_head_oid {
1354 Some(oid) => oid == ie.oid,
1355 None => submodule_worktree_is_unpopulated_placeholder(&file_path),
1356 };
1357 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
1358 if ignore_submodule_untracked {
1359 flags.untracked = false;
1360 }
1361 return Ok(!ref_matches || flags.modified || flags.untracked);
1362 }
1363
1364 let meta = match fs::symlink_metadata(&file_path) {
1365 Ok(m) => m,
1366 Err(e)
1367 if e.kind() == std::io::ErrorKind::NotFound
1368 || e.raw_os_error() == Some(20) =>
1369 {
1370 return Ok(true);
1371 }
1372 Err(e) => return Err(Error::Io(e)),
1373 };
1374
1375 if meta.is_dir() {
1376 return Ok(true);
1377 }
1378
1379 let worktree_mode = mode_from_metadata(&meta);
1380 if worktree_mode != ie.mode {
1381 return Ok(true);
1382 }
1383
1384 let git_dir = work_tree.join(".git");
1385 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1386 let conv = crlf::ConversionConfig::from_config(&config);
1387 let attrs = crlf::load_gitattributes(work_tree);
1388 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
1389 let worktree_oid = hash_worktree_file(
1390 odb,
1391 &file_path,
1392 &meta,
1393 &conv,
1394 &file_attrs,
1395 path_str_ref,
1396 Some(ie),
1397 )?;
1398
1399 let mut eff_oid = worktree_oid;
1400 if eff_oid != ie.oid {
1401 if let Ok(raw) = fs::read(&file_path) {
1402 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1403 if raw_oid == ie.oid {
1404 eff_oid = ie.oid;
1405 }
1406 }
1407 }
1408
1409 Ok(eff_oid != ie.oid)
1410}
1411
1412pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
1413 if meta.len() as u32 != ie.size {
1415 return false;
1416 }
1417 #[cfg(unix)]
1418 {
1419 use std::os::unix::fs::MetadataExt;
1420 if meta.mtime() as u32 != ie.mtime_sec {
1422 return false;
1423 }
1424 if meta.mtime_nsec() as u32 != ie.mtime_nsec {
1425 return false;
1426 }
1427 if meta.ctime() as u32 != ie.ctime_sec {
1429 return false;
1430 }
1431 if meta.ctime_nsec() as u32 != ie.ctime_nsec {
1432 return false;
1433 }
1434 if meta.ino() as u32 != ie.ino {
1436 return false;
1437 }
1438 if meta.dev() as u32 != ie.dev {
1439 return false;
1440 }
1441 }
1442 #[cfg(not(unix))]
1443 {
1444 use std::time::UNIX_EPOCH;
1445 if let Ok(mtime) = meta.modified() {
1446 if let Ok(dur) = mtime.duration_since(UNIX_EPOCH) {
1447 if dur.as_secs() as u32 != ie.mtime_sec {
1448 return false;
1449 }
1450 if dur.subsec_nanos() != ie.mtime_nsec {
1451 return false;
1452 }
1453 }
1454 }
1455 }
1456 true
1457}
1458
1459pub fn refresh_index_stat_content_verified(
1479 index: &mut Index,
1480 work_tree: &Path,
1481 index_mtime: Option<(u32, u32)>,
1482) -> bool {
1483 use crate::index::{MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK};
1484 let mut changed = false;
1485 for ie in &mut index.entries {
1486 if ie.stage() != 0 || ie.skip_worktree() || ie.assume_unchanged() || ie.intent_to_add() {
1487 continue;
1488 }
1489 if ie.mode != MODE_REGULAR && ie.mode != MODE_EXECUTABLE && ie.mode != MODE_SYMLINK {
1490 continue;
1491 }
1492 let Ok(path) = std::str::from_utf8(&ie.path) else {
1493 continue;
1494 };
1495 let abs = work_tree.join(path);
1496 let Ok(meta) = fs::symlink_metadata(&abs) else {
1497 continue;
1498 };
1499 if stat_matches(ie, &meta) {
1500 if entry_is_racy(ie, index_mtime)
1506 && !worktree_content_matches_index_oid(ie, &abs, &meta)
1507 {
1508 invalidate_index_stat_cache(ie);
1509 changed = true;
1510 }
1511 continue;
1512 }
1513 if !worktree_content_matches_index_oid(ie, &abs, &meta) {
1514 continue;
1515 }
1516 let refreshed = crate::index::entry_from_metadata(&meta, &ie.path, ie.oid, ie.mode);
1517 ie.ctime_sec = refreshed.ctime_sec;
1518 ie.ctime_nsec = refreshed.ctime_nsec;
1519 ie.mtime_sec = refreshed.mtime_sec;
1520 ie.mtime_nsec = refreshed.mtime_nsec;
1521 ie.dev = refreshed.dev;
1522 ie.ino = refreshed.ino;
1523 ie.uid = refreshed.uid;
1524 ie.gid = refreshed.gid;
1525 ie.size = refreshed.size;
1526 changed = true;
1527 }
1528 changed
1529}
1530
1531fn worktree_content_matches_index_oid(ie: &IndexEntry, abs: &Path, meta: &fs::Metadata) -> bool {
1533 use crate::index::{MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK};
1534 if ie.mode == MODE_SYMLINK {
1535 if !meta.file_type().is_symlink() {
1536 return false;
1537 }
1538 use std::os::unix::ffi::OsStrExt as _;
1539 fs::read_link(abs)
1540 .map(|t| Odb::hash_object_data(ObjectKind::Blob, t.as_os_str().as_bytes()) == ie.oid)
1541 .unwrap_or(false)
1542 } else if ie.mode == MODE_REGULAR || ie.mode == MODE_EXECUTABLE {
1543 if !meta.file_type().is_file() {
1544 return false;
1545 }
1546 fs::read(abs)
1547 .map(|bytes| Odb::hash_object_data(ObjectKind::Blob, &bytes) == ie.oid)
1548 .unwrap_or(false)
1549 } else {
1550 false
1551 }
1552}
1553
1554fn invalidate_index_stat_cache(ie: &mut IndexEntry) {
1556 ie.ctime_sec = 0;
1557 ie.ctime_nsec = 0;
1558 ie.mtime_sec = 0;
1559 ie.mtime_nsec = 0;
1560 ie.dev = 0;
1561 ie.ino = 0;
1562 ie.size = 0;
1563}
1564
1565fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
1568 let mut check = work_tree.to_path_buf();
1569 let components: Vec<&str> = rel_path.split('/').collect();
1570 for component in &components[..components.len().saturating_sub(1)] {
1572 check.push(component);
1573 match fs::symlink_metadata(&check) {
1574 Ok(meta) if meta.file_type().is_symlink() => return true,
1575 _ => {}
1576 }
1577 }
1578 false
1579}
1580
1581pub fn hash_worktree_file(
1582 odb: &Odb,
1583 path: &Path,
1584 meta: &fs::Metadata,
1585 conv: &crate::crlf::ConversionConfig,
1586 file_attrs: &crate::crlf::FileAttrs,
1587 rel_path: &str,
1588 index_entry: Option<&IndexEntry>,
1589) -> Result<ObjectId> {
1590 let prior_blob: Option<Vec<u8>> = index_entry
1591 .filter(|e| e.oid != zero_oid())
1592 .and_then(|e| odb.read(&e.oid).ok().map(|o| o.data));
1593 let data = if meta.file_type().is_symlink() {
1594 let target = fs::read_link(path)?;
1596 target.to_string_lossy().into_owned().into_bytes()
1597 } else if meta.is_dir() {
1598 Vec::new()
1601 } else {
1602 let raw = fs::read(path)?;
1603 let opts = crate::crlf::ConvertToGitOpts {
1606 index_blob: prior_blob.as_deref(),
1607 renormalize: false,
1608 check_safecrlf: false,
1609 };
1610 crate::crlf::convert_to_git_with_opts(&raw, rel_path, conv, file_attrs, opts).unwrap_or(raw)
1611 };
1612
1613 Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
1614}
1615
1616pub fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
1618 if meta.file_type().is_symlink() {
1619 0o120000
1620 } else {
1621 #[cfg(unix)]
1622 {
1623 if meta.mode() & 0o111 != 0 {
1624 return 0o100755;
1625 }
1626 }
1627 0o100644
1628 }
1629}
1630
1631pub fn diff_tree_to_worktree(
1648 odb: &Odb,
1649 tree_oid: Option<&ObjectId>,
1650 work_tree: &Path,
1651 index: &Index,
1652) -> Result<Vec<DiffEntry>> {
1653 use crate::config::ConfigSet;
1654 use crate::crlf;
1655
1656 let git_dir = work_tree.join(".git");
1657 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1658 let conv = crlf::ConversionConfig::from_config(&config);
1659 let attrs = crlf::load_gitattributes(work_tree);
1660
1661 let tree_flat = match tree_oid {
1663 Some(oid) => flatten_tree(odb, oid, "")?,
1664 None => Vec::new(),
1665 };
1666 let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
1667 tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
1668
1669 let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
1671 std::collections::BTreeMap::new();
1672 let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1673 let mut stage0_paths: std::collections::BTreeSet<Vec<u8>> = std::collections::BTreeSet::new();
1674 for ie in &index.entries {
1675 if ie.stage() != 0 {
1676 continue;
1677 }
1678 let path = String::from_utf8_lossy(&ie.path).to_string();
1679 index_entries.insert(&ie.path, ie);
1680 index_paths.insert(path);
1681 stage0_paths.insert(ie.path.clone());
1682 }
1683
1684 let mut unmerged_only_paths: std::collections::BTreeSet<String> =
1687 std::collections::BTreeSet::new();
1688 for ie in &index.entries {
1689 if !(1..=3).contains(&ie.stage()) {
1690 continue;
1691 }
1692 if stage0_paths.contains(&ie.path) {
1693 continue;
1694 }
1695 unmerged_only_paths.insert(String::from_utf8_lossy(&ie.path).into_owned());
1696 }
1697
1698 let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1700 all_paths.extend(tree_map.keys().cloned());
1701 all_paths.extend(index_paths.iter().cloned());
1702 all_paths.extend(unmerged_only_paths.iter().cloned());
1703
1704 let mut result = Vec::new();
1705
1706 for path in &all_paths {
1707 if index_entries
1708 .get(path.as_bytes())
1709 .is_some_and(|ie| ie.skip_worktree())
1710 {
1711 continue;
1714 }
1715
1716 let tree_entry = tree_map.get(path.as_str());
1717
1718 let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
1720 || index_entries
1721 .get(path.as_bytes())
1722 .is_some_and(|ie| ie.mode == 0o160000);
1723 if is_gitlink {
1724 if let Some(te) = tree_entry {
1725 let sub_dir = work_tree.join(path);
1726 let sub_head = read_submodule_head_oid(&sub_dir);
1727 let index_oid = index_entries
1728 .get(path.as_bytes())
1729 .filter(|ie| ie.mode == 0o160000)
1730 .map(|ie| ie.oid);
1731 let index_matches_tree = index_oid.is_some_and(|oid| oid == te.oid);
1732 let head_differs = sub_head.as_ref() != Some(&te.oid);
1733 let dirty_while_aligned = index_matches_tree
1734 && !head_differs
1735 && submodule_has_dirty_worktree_for_super_diff(work_tree, path, &te.oid);
1736 if head_differs || dirty_while_aligned {
1737 let new_oid = if head_differs { zero_oid() } else { te.oid };
1741 result.push(DiffEntry {
1742 status: DiffStatus::Modified,
1743 old_path: Some(path.clone()),
1744 new_path: Some(path.clone()),
1745 old_mode: format_mode(te.mode),
1746 new_mode: format_mode(te.mode),
1747 old_oid: te.oid,
1748 new_oid,
1749 score: None,
1750 });
1751 }
1752 }
1753 continue;
1754 }
1755
1756 let file_path = work_tree.join(path);
1757
1758 let wt_meta = match fs::symlink_metadata(&file_path) {
1759 Ok(m) => Some(m),
1760 Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
1761 Err(e) => return Err(Error::Io(e)),
1762 };
1763
1764 if unmerged_only_paths.contains(path) {
1765 if let (Some(te), Some(meta)) = (tree_entry, wt_meta.as_ref()) {
1766 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1767 let wt_oid =
1768 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, None)?;
1769 let wt_mode = mode_from_metadata(meta);
1770 if wt_oid != te.oid || wt_mode != te.mode {
1771 result.push(DiffEntry {
1772 status: DiffStatus::Modified,
1773 old_path: Some(path.clone()),
1774 new_path: Some(path.clone()),
1775 old_mode: format_mode(te.mode),
1776 new_mode: format_mode(wt_mode),
1777 old_oid: te.oid,
1778 new_oid: wt_oid,
1779 score: None,
1780 });
1781 }
1782 }
1783 continue;
1784 }
1785
1786 match (tree_entry, wt_meta) {
1787 (Some(te), Some(ref meta)) => {
1788 let wt_mode = mode_from_metadata(meta);
1789 let Some(ie) = index_entries.get(path.as_bytes()) else {
1790 continue;
1791 };
1792
1793 let index_matches_tree = ie.oid == te.oid && ie.mode == te.mode;
1794
1795 if index_matches_tree && wt_mode == te.mode && stat_matches(ie, meta) {
1797 continue;
1798 }
1799
1800 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1801 let idx_ent = index_entries.get(path.as_bytes()).copied();
1802
1803 if ie.oid == te.oid && ie.mode != te.mode {
1805 result.push(DiffEntry {
1806 status: DiffStatus::Modified,
1807 old_path: Some(path.clone()),
1808 new_path: Some(path.clone()),
1809 old_mode: format_mode(te.mode),
1810 new_mode: format_mode(ie.mode),
1811 old_oid: te.oid,
1812 new_oid: te.oid,
1813 score: None,
1814 });
1815 continue;
1816 }
1817
1818 if index_matches_tree {
1821 let wt_oid = hash_worktree_file(
1822 odb,
1823 &file_path,
1824 meta,
1825 &conv,
1826 &file_attrs,
1827 path,
1828 idx_ent,
1829 )?;
1830 let mut eff_oid = wt_oid;
1831 if eff_oid != te.oid {
1832 if let Ok(raw) = fs::read(&file_path) {
1833 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1834 if raw_oid == te.oid {
1835 eff_oid = te.oid;
1836 }
1837 }
1838 }
1839 if eff_oid != te.oid {
1840 result.push(DiffEntry {
1841 status: DiffStatus::Modified,
1842 old_path: Some(path.clone()),
1843 new_path: Some(path.clone()),
1844 old_mode: format_mode(te.mode),
1845 new_mode: format_mode(wt_mode),
1846 old_oid: te.oid,
1847 new_oid: eff_oid,
1848 score: None,
1849 });
1850 } else if wt_mode != te.mode {
1851 result.push(DiffEntry {
1852 status: DiffStatus::Modified,
1853 old_path: Some(path.clone()),
1854 new_path: Some(path.clone()),
1855 old_mode: format_mode(te.mode),
1856 new_mode: format_mode(wt_mode),
1857 old_oid: te.oid,
1858 new_oid: te.oid,
1859 score: None,
1860 });
1861 }
1862 continue;
1863 }
1864
1865 let wt_oid =
1867 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, idx_ent)?;
1868 let mut eff_oid = wt_oid;
1869 if eff_oid != te.oid {
1870 if let Ok(raw) = fs::read(&file_path) {
1871 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1872 if raw_oid == te.oid {
1873 eff_oid = te.oid;
1874 }
1875 }
1876 }
1877 if eff_oid != te.oid || wt_mode != te.mode {
1878 result.push(DiffEntry {
1879 status: DiffStatus::Modified,
1880 old_path: Some(path.clone()),
1881 new_path: Some(path.clone()),
1882 old_mode: format_mode(te.mode),
1883 new_mode: format_mode(wt_mode),
1884 old_oid: te.oid,
1885 new_oid: eff_oid,
1886 score: None,
1887 });
1888 }
1889 }
1890 (Some(te), None) => {
1891 result.push(DiffEntry {
1893 status: DiffStatus::Deleted,
1894 old_path: Some(path.clone()),
1895 new_path: None,
1896 old_mode: format_mode(te.mode),
1897 new_mode: "000000".to_owned(),
1898 old_oid: te.oid,
1899 new_oid: zero_oid(),
1900 score: None,
1901 });
1902 }
1903 (None, Some(ref meta)) => {
1904 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1906 let wt_oid = hash_worktree_file(
1907 odb,
1908 &file_path,
1909 meta,
1910 &conv,
1911 &file_attrs,
1912 path,
1913 index_entries.get(path.as_bytes()).copied(),
1914 )?;
1915 let wt_mode = mode_from_metadata(meta);
1916 result.push(DiffEntry {
1917 status: DiffStatus::Added,
1918 old_path: None,
1919 new_path: Some(path.clone()),
1920 old_mode: "000000".to_owned(),
1921 new_mode: format_mode(wt_mode),
1922 old_oid: zero_oid(),
1923 new_oid: wt_oid,
1924 score: None,
1925 });
1926 }
1927 (None, None) => {
1928 }
1930 }
1931 }
1932
1933 result.sort_by(|a, b| a.path().cmp(b.path()));
1934 Ok(result)
1935}
1936
1937fn read_added_entry_bytes(
1940 odb: &Odb,
1941 entry: &DiffEntry,
1942 work_root: Option<&Path>,
1943) -> Option<Vec<u8>> {
1944 if entry.new_oid != zero_oid() {
1945 return odb.read(&entry.new_oid).ok().map(|obj| obj.data);
1946 }
1947 let path = entry.new_path.as_deref()?;
1948 let root = work_root?;
1949 fs::read(root.join(path)).ok()
1950}
1951
1952fn modified_as_copy_from_sources(
1953 odb: &Odb,
1954 work_root: Option<&Path>,
1955 e: &DiffEntry,
1956 threshold: u32,
1957 sources: &[(String, ObjectId, bool)],
1958 source_contents: &[Option<Vec<u8>>],
1959 source_tree_entries: &[(String, String, ObjectId)],
1960) -> Option<DiffEntry> {
1961 fn regular_file_mode(mode: &str) -> bool {
1962 mode == "100644" || mode == "100755"
1963 }
1964
1965 if e.status != DiffStatus::Modified || !regular_file_mode(&e.new_mode) {
1966 return None;
1967 }
1968 let new_data = read_added_entry_bytes(odb, e, work_root)?;
1969 let new_oid_eff = if e.new_oid != zero_oid() {
1970 e.new_oid
1971 } else {
1972 Odb::hash_object_data(ObjectKind::Blob, &new_data)
1973 };
1974
1975 let mut best: Option<(usize, u32)> = None;
1976 for (si, (src_path, src_oid, is_deleted)) in sources.iter().enumerate() {
1977 if *is_deleted {
1978 continue;
1979 }
1980 if e.new_path.as_deref() == Some(src_path.as_str()) {
1981 continue;
1982 }
1983 let src_mode_str = source_tree_entries
1984 .iter()
1985 .find(|(p, _, _)| p == src_path)
1986 .map(|(_, m, _)| m.as_str())
1987 .unwrap_or("100644");
1988 if !regular_file_mode(src_mode_str) {
1989 continue;
1990 }
1991
1992 let score = if *src_oid == new_oid_eff {
1993 100
1994 } else {
1995 match (&source_contents[si], Some(new_data.as_slice())) {
1996 (Some(old_data), Some(nd)) => compute_similarity(old_data, nd),
1997 _ => 0,
1998 }
1999 };
2000 if score >= threshold {
2001 let replace = match best {
2002 None => true,
2003 Some((_, s)) => score > s,
2004 };
2005 if replace {
2006 best = Some((si, score));
2007 }
2008 }
2009 }
2010
2011 let (si, score) = best?;
2012 let (src_path, src_oid, _) = &sources[si];
2013 let src_mode = source_tree_entries
2014 .iter()
2015 .find(|(p, _, _)| p == src_path)
2016 .map(|(_, m, _)| m.clone())
2017 .unwrap_or_else(|| e.old_mode.clone());
2018
2019 Some(DiffEntry {
2020 status: DiffStatus::Copied,
2021 old_path: Some(src_path.clone()),
2022 new_path: e.new_path.clone(),
2023 old_mode: src_mode,
2024 new_mode: e.new_mode.clone(),
2025 old_oid: *src_oid,
2026 new_oid: e.new_oid,
2027 score: Some(score),
2028 })
2029}
2030
2031pub fn detect_renames(
2043 odb: &Odb,
2044 work_root: Option<&Path>,
2045 entries: Vec<DiffEntry>,
2046 threshold: u32,
2047) -> Vec<DiffEntry> {
2048 let mut deleted: Vec<DiffEntry> = Vec::new();
2050 let mut added: Vec<DiffEntry> = Vec::new();
2051 let mut others: Vec<DiffEntry> = Vec::new();
2052
2053 for entry in entries {
2054 match entry.status {
2055 DiffStatus::Deleted => deleted.push(entry),
2056 DiffStatus::Added => added.push(entry),
2057 _ => others.push(entry),
2058 }
2059 }
2060
2061 if deleted.is_empty() || added.is_empty() {
2062 let mut result = others;
2064 result.extend(deleted);
2065 result.extend(added);
2066 result.sort_by(|a, b| a.path().cmp(b.path()));
2067 return result;
2068 }
2069
2070 let deleted_contents: Vec<Option<Vec<u8>>> = deleted
2072 .iter()
2073 .map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
2074 .collect();
2075
2076 let added_contents: Vec<Option<Vec<u8>>> = added
2078 .iter()
2079 .map(|a| read_added_entry_bytes(odb, a, work_root))
2080 .collect();
2081
2082 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
2085
2086 fn is_regularish_mode(mode: &str) -> bool {
2087 mode == "100644" || mode == "100755"
2088 }
2089
2090 fn same_path_same_blob(del: &DiffEntry, add: &DiffEntry) -> bool {
2091 del.old_path == add.new_path && del.old_oid == add.new_oid && del.old_mode == add.new_mode
2092 }
2093
2094 for (di, del) in deleted.iter().enumerate() {
2095 for (ai, add) in added.iter().enumerate() {
2096 if del.old_oid == add.new_oid {
2098 scores.push((100, di, ai));
2099 continue;
2100 }
2101
2102 if !is_regularish_mode(&del.old_mode) || !is_regularish_mode(&add.new_mode) {
2105 continue;
2106 }
2107
2108 let score = match (&deleted_contents[di], &added_contents[ai]) {
2109 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
2110 _ => 0,
2111 };
2112
2113 if score >= threshold {
2114 scores.push((score, di, ai));
2115 }
2116 }
2117 }
2118
2119 scores.sort_by(|a, b| {
2123 let a_noop = same_path_same_blob(&deleted[a.1], &added[a.2]);
2124 let b_noop = same_path_same_blob(&deleted[b.1], &added[b.2]);
2125 let a_same = same_basename(&deleted[a.1], &added[a.2]);
2126 let b_same = same_basename(&deleted[b.1], &added[b.2]);
2127 a_noop
2128 .cmp(&b_noop)
2129 .then_with(|| b_same.cmp(&a_same))
2130 .then_with(|| b.0.cmp(&a.0))
2131 });
2132
2133 let mut used_deleted = vec![false; deleted.len()];
2134 let mut used_added = vec![false; added.len()];
2135 let mut renames: Vec<DiffEntry> = Vec::new();
2136
2137 for (score, di, ai) in &scores {
2138 if used_deleted[*di] || used_added[*ai] {
2139 continue;
2140 }
2141 used_deleted[*di] = true;
2142 used_added[*ai] = true;
2143
2144 let del = &deleted[*di];
2145 let add = &added[*ai];
2146
2147 if same_path_same_blob(del, add) {
2152 continue;
2153 }
2154
2155 renames.push(DiffEntry {
2156 status: DiffStatus::Renamed,
2157 old_path: del.old_path.clone(),
2158 new_path: add.new_path.clone(),
2159 old_mode: del.old_mode.clone(),
2160 new_mode: add.new_mode.clone(),
2161 old_oid: del.old_oid,
2162 new_oid: add.new_oid,
2163 score: Some(*score),
2164 });
2165 }
2166
2167 let mut result = others;
2169 result.extend(renames);
2170 for (i, entry) in deleted.into_iter().enumerate() {
2171 if !used_deleted[i] {
2172 result.push(entry);
2173 }
2174 }
2175 for (i, entry) in added.into_iter().enumerate() {
2176 if !used_added[i] {
2177 result.push(entry);
2178 }
2179 }
2180
2181 result.sort_by(|a, b| a.path().cmp(b.path()));
2182 result
2183}
2184
2185pub fn detect_copies(
2196 odb: &Odb,
2197 work_root: Option<&Path>,
2198 entries: Vec<DiffEntry>,
2199 threshold: u32,
2200 find_copies_harder: bool,
2201 source_tree_entries: &[(String, String, ObjectId)],
2202) -> Vec<DiffEntry> {
2203 use std::collections::{HashMap, HashSet};
2204
2205 let mut deleted: Vec<DiffEntry> = Vec::new();
2207 let mut added: Vec<DiffEntry> = Vec::new();
2208 let mut others: Vec<DiffEntry> = Vec::new();
2209
2210 for entry in entries {
2211 match entry.status {
2212 DiffStatus::Deleted => deleted.push(entry),
2213 DiffStatus::Added => added.push(entry),
2214 _ => others.push(entry),
2215 }
2216 }
2217
2218 let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
2222
2223 for entry in &deleted {
2224 if let Some(ref path) = entry.old_path {
2225 deleted_source_idx.insert(path.clone(), sources.len());
2226 sources.push((path.clone(), entry.old_oid, true));
2227 }
2228 }
2229
2230 for entry in &others {
2233 if matches!(entry.status, DiffStatus::Modified | DiffStatus::TypeChanged) {
2234 if let Some(ref old_path) = entry.old_path {
2235 if !sources.iter().any(|(p, _, _)| p == old_path) {
2236 sources.push((old_path.clone(), entry.old_oid, false));
2237 }
2238 }
2239 }
2240 }
2241
2242 if find_copies_harder {
2244 for (path, _mode, oid) in source_tree_entries {
2245 if !sources.iter().any(|(p, _, _)| p == path) {
2246 sources.push((path.clone(), *oid, false));
2247 }
2248 }
2249 }
2250
2251 if sources.is_empty() {
2252 let mut result = others;
2253 result.extend(deleted);
2254 result.extend(added);
2255 result.sort_by(|a, b| a.path().cmp(b.path()));
2256 return result;
2257 }
2258
2259 let source_contents: Vec<Option<Vec<u8>>> = sources
2261 .iter()
2262 .map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
2263 .collect();
2264
2265 let mut result_entries: Vec<DiffEntry> = Vec::new();
2266 let mut renamed_deleted: HashSet<usize> = HashSet::new();
2267 let mut used_added2 = vec![false; added.len()];
2268
2269 if !added.is_empty() {
2270 let added_contents: Vec<Option<Vec<u8>>> = added
2272 .iter()
2273 .map(|a| read_added_entry_bytes(odb, a, work_root))
2274 .collect();
2275
2276 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
2278 for (si, (src_path, src_oid, _)) in sources.iter().enumerate() {
2279 for (ai, add) in added.iter().enumerate() {
2280 if add.new_path.as_deref() == Some(src_path.as_str()) {
2283 continue;
2284 }
2285 let add_oid = if add.new_oid != zero_oid() {
2286 add.new_oid
2287 } else if let Some(ref data) = added_contents[ai] {
2288 Odb::hash_object_data(ObjectKind::Blob, data)
2289 } else {
2290 zero_oid()
2291 };
2292 if *src_oid == add_oid {
2293 scores.push((100, si, ai));
2294 continue;
2295 }
2296 let score = match (&source_contents[si], &added_contents[ai]) {
2297 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
2298 _ => 0,
2299 };
2300 if score >= threshold {
2301 scores.push((score, si, ai));
2302 }
2303 }
2304 }
2305
2306 scores.sort_by(|a, b| b.0.cmp(&a.0));
2308
2309 let mut used_added = vec![false; added.len()];
2311 let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
2312 for &(score, si, ai) in &scores {
2313 if used_added[ai] {
2314 continue;
2315 }
2316 used_added[ai] = true;
2317 source_to_added.entry(si).or_default().push((ai, score));
2318 }
2319
2320 for (&si, assignments_for_src) in &source_to_added {
2322 let (_, _, is_deleted) = &sources[si];
2323 if *is_deleted && !assignments_for_src.is_empty() {
2324 let rename_ai = assignments_for_src
2327 .iter()
2328 .max_by_key(|(ai, _score)| added[*ai].path().to_string())
2329 .map(|(ai, _)| *ai);
2330
2331 for &(ai, score) in assignments_for_src {
2332 let (ref src_path, _, _) = sources[si];
2333 let add = &added[ai];
2334 let src_mode = source_tree_entries
2335 .iter()
2336 .find(|(p, _, _)| p == src_path)
2337 .map(|(_, m, _)| m.clone())
2338 .unwrap_or_else(|| add.old_mode.clone());
2339
2340 let is_rename = Some(ai) == rename_ai;
2341 result_entries.push(DiffEntry {
2342 status: if is_rename {
2343 DiffStatus::Renamed
2344 } else {
2345 DiffStatus::Copied
2346 },
2347 old_path: Some(src_path.clone()),
2348 new_path: add.new_path.clone(),
2349 old_mode: src_mode,
2350 new_mode: add.new_mode.clone(),
2351 old_oid: sources[si].1,
2352 new_oid: add.new_oid,
2353 score: Some(score),
2354 });
2355 used_added2[ai] = true;
2356 }
2357 renamed_deleted.insert(si);
2358 } else {
2359 for &(ai, score) in assignments_for_src {
2361 let (ref src_path, _, _) = sources[si];
2362 let add = &added[ai];
2363 let src_mode = source_tree_entries
2364 .iter()
2365 .find(|(p, _, _)| p == src_path)
2366 .map(|(_, m, _)| m.clone())
2367 .unwrap_or_else(|| add.old_mode.clone());
2368
2369 result_entries.push(DiffEntry {
2370 status: DiffStatus::Copied,
2371 old_path: Some(src_path.clone()),
2372 new_path: add.new_path.clone(),
2373 old_mode: src_mode,
2374 new_mode: add.new_mode.clone(),
2375 old_oid: sources[si].1,
2376 new_oid: add.new_oid,
2377 score: Some(score),
2378 });
2379 used_added2[ai] = true;
2380 }
2381 }
2382 }
2383 }
2384
2385 for entry in deleted.into_iter() {
2387 if let Some(ref path) = entry.old_path {
2388 if let Some(&si) = deleted_source_idx.get(path) {
2389 if renamed_deleted.contains(&si) {
2390 continue;
2392 }
2393 }
2394 }
2395 result_entries.push(entry);
2396 }
2397
2398 let mut result = others;
2399 result.extend(result_entries);
2400 for (i, entry) in added.into_iter().enumerate() {
2402 if !used_added2[i] {
2403 result.push(entry);
2404 }
2405 }
2406
2407 let mut final_result = Vec::with_capacity(result.len());
2408 for e in result {
2409 if let Some(c) = modified_as_copy_from_sources(
2410 odb,
2411 work_root,
2412 &e,
2413 threshold,
2414 &sources,
2415 &source_contents,
2416 source_tree_entries,
2417 ) {
2418 final_result.push(c);
2419 } else {
2420 final_result.push(e);
2421 }
2422 }
2423
2424 final_result.sort_by(|a, b| a.path().cmp(b.path()));
2425 final_result
2426}
2427
2428pub fn status_apply_rename_copy_detection(
2438 odb: &Odb,
2439 unstaged_raw: Vec<DiffEntry>,
2440 threshold: u32,
2441 copies: bool,
2442 head_tree: Option<&ObjectId>,
2443) -> Result<Vec<DiffEntry>> {
2444 let after_renames = detect_renames(odb, None, unstaged_raw, threshold);
2445 if !copies {
2446 return Ok(after_renames);
2447 }
2448 let source_tree_entries: Vec<(String, String, ObjectId)> = match head_tree {
2449 Some(oid) => flatten_tree(odb, oid, "")?
2450 .into_iter()
2451 .map(|e| (e.path, format_mode(e.mode), e.oid))
2452 .collect(),
2453 None => Vec::new(),
2454 };
2455 Ok(detect_copies(
2456 odb,
2457 None,
2458 after_renames,
2459 threshold,
2460 false,
2461 &source_tree_entries,
2462 ))
2463}
2464
2465pub fn format_rename_path(old: &str, new: &str) -> String {
2473 let ob = old.as_bytes();
2474 let nb = new.as_bytes();
2475
2476 let pfx = {
2478 let mut last_sep = 0usize;
2479 let min_len = ob.len().min(nb.len());
2480 for i in 0..min_len {
2481 if ob[i] != nb[i] {
2482 break;
2483 }
2484 if ob[i] == b'/' {
2485 last_sep = i + 1;
2486 }
2487 }
2488 last_sep
2489 };
2490
2491 let mut sfx = {
2493 let mut last_sep = 0usize;
2494 let min_len = ob.len().min(nb.len());
2495 for i in 0..min_len {
2496 let oi = ob.len() - 1 - i;
2497 let ni = nb.len() - 1 - i;
2498 if ob[oi] != nb[ni] {
2499 break;
2500 }
2501 if ob[oi] == b'/' {
2502 last_sep = i + 1;
2503 }
2504 }
2505 last_sep
2506 };
2507
2508 let mut sfx_at_old = ob.len() - sfx;
2510 let mut sfx_at_new = nb.len() - sfx;
2511
2512 while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
2515 let suffix_bytes = &ob[sfx_at_old..];
2517 let mut new_sfx = 0;
2518 for (i, &b) in suffix_bytes.iter().enumerate().skip(1) {
2520 if b == b'/' {
2521 new_sfx = sfx - i;
2522 break;
2523 }
2524 }
2525 if new_sfx == 0 || new_sfx >= sfx {
2526 sfx_at_old = ob.len();
2527 sfx_at_new = nb.len();
2528 break;
2529 }
2530 sfx = new_sfx;
2531 sfx_at_old = ob.len() - sfx;
2532 sfx_at_new = nb.len() - sfx;
2533 }
2534
2535 let prefix = &old[..pfx];
2542 let suffix = &old[sfx_at_old..];
2543 let old_mid = if pfx <= sfx_at_old {
2544 &old[pfx..sfx_at_old]
2545 } else {
2546 ""
2547 };
2548 let new_mid = if pfx <= sfx_at_new {
2549 &new[pfx..sfx_at_new]
2550 } else {
2551 ""
2552 };
2553
2554 if prefix.is_empty() && suffix.is_empty() {
2555 return format!("{old} => {new}");
2556 }
2557
2558 format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
2559}
2560
2561fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
2563 let old = del.old_path.as_deref().unwrap_or("");
2564 let new = add.new_path.as_deref().unwrap_or("");
2565 let old_base = old.rsplit('/').next().unwrap_or(old);
2566 let new_base = new.rsplit('/').next().unwrap_or(new);
2567 old_base == new_base && !old_base.is_empty()
2568}
2569
2570fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
2575 let old_norm = crate::crlf::crlf_to_lf(old);
2578 let new_norm = crate::crlf::crlf_to_lf(new);
2579
2580 let src_size = old_norm.len();
2581 let dst_size = new_norm.len();
2582
2583 if src_size == 0 && dst_size == 0 {
2584 return 100;
2585 }
2586 let total = src_size + dst_size;
2587 if total == 0 {
2588 return 100;
2589 }
2590
2591 use similar::{ChangeTag, TextDiff};
2593 let old_str = String::from_utf8_lossy(&old_norm);
2594 let new_str = String::from_utf8_lossy(&new_norm);
2595 let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
2596
2597 let mut shared_bytes = 0usize;
2598 for change in diff.iter_all_changes() {
2599 if change.tag() == ChangeTag::Equal {
2600 shared_bytes += change.value().len();
2602 }
2603 }
2604
2605 let max_size = src_size.max(dst_size);
2608
2609 ((shared_bytes * 100) / max_size).min(100) as u32
2610}
2611
2612#[must_use]
2616pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
2617 compute_similarity(old, new)
2618}
2619
2620pub fn format_raw(entry: &DiffEntry) -> String {
2626 let path = match entry.status {
2627 DiffStatus::Renamed | DiffStatus::Copied => {
2628 format!(
2629 "{}\t{}",
2630 entry.old_path.as_deref().unwrap_or(""),
2631 entry.new_path.as_deref().unwrap_or("")
2632 )
2633 }
2634 _ => entry.path().to_owned(),
2635 };
2636
2637 let status_str = match (entry.status, entry.score) {
2638 (DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
2639 (DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
2640 _ => entry.status.letter().to_string(),
2641 };
2642
2643 format!(
2644 ":{} {} {} {} {}\t{}",
2645 entry.old_mode, entry.new_mode, entry.old_oid, entry.new_oid, status_str, path
2646 )
2647}
2648
2649pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
2651 let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
2652 "..."
2653 } else {
2654 ""
2655 };
2656 let old_hex = format!("{}", entry.old_oid);
2657 let new_hex = format!("{}", entry.new_oid);
2658 let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
2659 let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
2660
2661 let path = match entry.status {
2663 DiffStatus::Renamed | DiffStatus::Copied => format!(
2664 "{}\t{}",
2665 entry.old_path.as_deref().unwrap_or(""),
2666 entry.new_path.as_deref().unwrap_or("")
2667 ),
2668 _ => entry.path().to_owned(),
2669 };
2670 let status_str = match (entry.status, entry.score) {
2671 (DiffStatus::Renamed, Some(s)) => format!("R{s:03}"),
2672 (DiffStatus::Copied, Some(s)) => format!("C{s:03}"),
2673 _ => entry.status.letter().to_string(),
2674 };
2675
2676 format!(
2677 ":{} {} {}{} {}{} {}\t{}",
2678 entry.old_mode,
2679 entry.new_mode,
2680 old_abbrev,
2681 ellipsis,
2682 new_abbrev,
2683 ellipsis,
2684 status_str,
2685 path
2686 )
2687}
2688
2689pub fn unified_diff(
2704 old_content: &str,
2705 new_content: &str,
2706 old_path: &str,
2707 new_path: &str,
2708 context_lines: usize,
2709 indent_heuristic: bool,
2710 quote_path_fully: bool,
2711) -> String {
2712 unified_diff_with_prefix(
2713 old_content,
2714 new_content,
2715 old_path,
2716 new_path,
2717 context_lines,
2718 0,
2719 "a/",
2720 "b/",
2721 indent_heuristic,
2722 quote_path_fully,
2723 )
2724}
2725
2726#[allow(clippy::too_many_arguments)] pub fn unified_diff_with_prefix(
2732 old_content: &str,
2733 new_content: &str,
2734 old_path: &str,
2735 new_path: &str,
2736 context_lines: usize,
2737 inter_hunk_context: usize,
2738 src_prefix: &str,
2739 dst_prefix: &str,
2740 indent_heuristic: bool,
2741 quote_path_fully: bool,
2742) -> String {
2743 unified_diff_with_prefix_and_funcname(
2744 old_content,
2745 new_content,
2746 old_path,
2747 new_path,
2748 context_lines,
2749 inter_hunk_context,
2750 src_prefix,
2751 dst_prefix,
2752 None,
2753 indent_heuristic,
2754 quote_path_fully,
2755 )
2756}
2757
2758#[allow(clippy::too_many_arguments)]
2761pub fn unified_diff_with_prefix_and_funcname(
2762 old_content: &str,
2763 new_content: &str,
2764 old_path: &str,
2765 new_path: &str,
2766 context_lines: usize,
2767 inter_hunk_context: usize,
2768 src_prefix: &str,
2769 dst_prefix: &str,
2770 funcname_matcher: Option<&FuncnameMatcher>,
2771 indent_heuristic: bool,
2772 quote_path_fully: bool,
2773) -> String {
2774 unified_diff_with_prefix_and_funcname_and_algorithm(
2775 old_content,
2776 new_content,
2777 old_path,
2778 new_path,
2779 context_lines,
2780 inter_hunk_context,
2781 src_prefix,
2782 dst_prefix,
2783 funcname_matcher,
2784 similar::Algorithm::Myers,
2785 false,
2786 false,
2787 indent_heuristic,
2788 quote_path_fully,
2789 )
2790}
2791
2792#[allow(clippy::too_many_arguments)]
2798pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
2799 old_content: &str,
2800 new_content: &str,
2801 old_path: &str,
2802 new_path: &str,
2803 context_lines: usize,
2804 inter_hunk_context: usize,
2805 src_prefix: &str,
2806 dst_prefix: &str,
2807 funcname_matcher: Option<&FuncnameMatcher>,
2808 algorithm: similar::Algorithm,
2809 function_context: bool,
2810 use_git_histogram: bool,
2811 indent_heuristic: bool,
2812 quote_path_fully: bool,
2813) -> String {
2814 if use_git_histogram {
2815 return unified_diff_histogram_with_prefix_and_funcname(
2816 old_content,
2817 new_content,
2818 old_path,
2819 new_path,
2820 context_lines,
2821 inter_hunk_context,
2822 src_prefix,
2823 dst_prefix,
2824 funcname_matcher,
2825 quote_path_fully,
2826 );
2827 }
2828
2829 if function_context {
2830 return unified_diff_with_function_context(
2831 old_content,
2832 new_content,
2833 old_path,
2834 new_path,
2835 context_lines,
2836 inter_hunk_context,
2837 src_prefix,
2838 dst_prefix,
2839 funcname_matcher,
2840 algorithm,
2841 indent_heuristic,
2842 quote_path_fully,
2843 );
2844 }
2845
2846 use crate::quote_path::format_diff_path_with_prefix;
2847 use similar::{udiff::UnifiedDiffHunk, TextDiff};
2848
2849 let diff = TextDiff::configure()
2850 .algorithm(algorithm)
2851 .diff_lines(old_content, new_content);
2852 let compacted_ops = diff_indent_heuristic::diff_lines_ops_compacted(
2853 old_content,
2854 new_content,
2855 algorithm,
2856 indent_heuristic,
2857 );
2858
2859 let mut output = String::new();
2860 if old_path == "/dev/null" {
2861 output.push_str("--- /dev/null\n");
2862 } else if src_prefix.is_empty() {
2863 output.push_str(&format!("--- {old_path}\n"));
2866 } else {
2867 output.push_str("--- ");
2868 output.push_str(&format_diff_path_with_prefix(
2869 src_prefix,
2870 old_path,
2871 quote_path_fully,
2872 ));
2873 output.push('\n');
2874 }
2875 if new_path == "/dev/null" {
2876 output.push_str("+++ /dev/null\n");
2877 } else if dst_prefix.is_empty() {
2878 output.push_str(&format!("+++ {new_path}\n"));
2879 } else {
2880 output.push_str("+++ ");
2881 output.push_str(&format_diff_path_with_prefix(
2882 dst_prefix,
2883 new_path,
2884 quote_path_fully,
2885 ));
2886 output.push('\n');
2887 }
2888
2889 let old_lines: Vec<&str> = old_content.lines().collect();
2890
2891 let max_common_gap = context_lines
2897 .saturating_mul(2)
2898 .saturating_add(inter_hunk_context);
2899 let op_groups = group_diff_ops_gap(compacted_ops, context_lines, max_common_gap);
2900
2901 for ops in op_groups {
2902 if ops.is_empty() {
2903 continue;
2904 }
2905 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
2906 let hunk_str = format!("{hunk}");
2907 if let Some(first_newline) = hunk_str.find('\n') {
2911 let header_line = &hunk_str[..first_newline];
2912 let rest = &hunk_str[first_newline..];
2913
2914 if let Some(func_ctx) =
2916 extract_function_context(header_line, &old_lines, funcname_matcher)
2917 {
2918 output.push_str(header_line);
2919 output.push(' ');
2920 output.push_str(&func_ctx);
2921 output.push_str(rest);
2922 } else {
2923 output.push_str(&hunk_str);
2924 }
2925 } else {
2926 output.push_str(&hunk_str);
2927 }
2928 }
2929
2930 output
2931}
2932
2933fn group_diff_ops_gap(
2939 mut ops: Vec<similar::DiffOp>,
2940 context: usize,
2941 max_common_gap: usize,
2942) -> Vec<Vec<similar::DiffOp>> {
2943 use similar::DiffOp;
2944 if ops.is_empty() {
2945 return vec![];
2946 }
2947
2948 let mut pending_group = Vec::new();
2949 let mut rv = Vec::new();
2950
2951 if let Some(DiffOp::Equal {
2952 old_index,
2953 new_index,
2954 len,
2955 }) = ops.first_mut()
2956 {
2957 let offset = (*len).saturating_sub(context);
2958 *old_index += offset;
2959 *new_index += offset;
2960 *len -= offset;
2961 }
2962
2963 if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
2964 *len -= (*len).saturating_sub(context);
2965 }
2966
2967 for op in ops.into_iter() {
2968 if let DiffOp::Equal {
2969 old_index,
2970 new_index,
2971 len,
2972 } = op
2973 {
2974 if len > max_common_gap {
2977 pending_group.push(DiffOp::Equal {
2978 old_index,
2979 new_index,
2980 len: context,
2981 });
2982 rv.push(pending_group);
2983 let offset = len.saturating_sub(context);
2984 pending_group = vec![DiffOp::Equal {
2985 old_index: old_index + offset,
2986 new_index: new_index + offset,
2987 len: len - offset,
2988 }];
2989 continue;
2990 }
2991 }
2992 pending_group.push(op);
2993 }
2994
2995 match &pending_group[..] {
2996 &[] | &[similar::DiffOp::Equal { .. }] => {}
2997 _ => rv.push(pending_group),
2998 }
2999
3000 rv
3001}
3002
3003fn unified_diff_with_function_context(
3005 old_content: &str,
3006 new_content: &str,
3007 old_path: &str,
3008 new_path: &str,
3009 context_lines: usize,
3010 inter_hunk_context: usize,
3011 src_prefix: &str,
3012 dst_prefix: &str,
3013 funcname_matcher: Option<&FuncnameMatcher>,
3014 algorithm: similar::Algorithm,
3015 indent_heuristic: bool,
3016 quote_path_fully: bool,
3017) -> String {
3018 use crate::quote_path::format_diff_path_with_prefix;
3019 use similar::{group_diff_ops, udiff::UnifiedDiffHunk, TextDiff};
3020
3021 let diff = TextDiff::configure()
3022 .algorithm(algorithm)
3023 .diff_lines(old_content, new_content);
3024
3025 let old_lines: Vec<&str> = old_content.lines().collect();
3026 let new_lines: Vec<&str> = new_content.lines().collect();
3027 let n_old = old_lines.len();
3028 let n_new = new_lines.len();
3029
3030 let group_radius = context_lines
3031 .saturating_mul(2)
3032 .saturating_add(inter_hunk_context);
3033 let all_ops = diff.ops().to_vec();
3034 let op_groups = group_diff_ops(all_ops.clone(), group_radius);
3035
3036 let mut ranges: Vec<(usize, usize, usize, usize)> = Vec::new();
3037
3038 for ops in op_groups {
3039 if ops.is_empty() {
3040 continue;
3041 }
3042 let i1_anchor = func_context_old_anchor(&ops, n_old);
3043 let i1_end = hunk_old_change_end_exclusive(&ops);
3044 let skip_preimage_pull =
3045 append_with_whole_function_added(&ops, n_old, n_new, &new_lines, funcname_matcher);
3046 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
3047 let hunk_str = format!("{hunk}");
3048 let header_line = hunk_str
3049 .lines()
3050 .next()
3051 .unwrap_or("")
3052 .trim_end_matches(['\r', '\n']);
3053 let Some((base_s1, base_e1, _base_s2, _base_e2)) =
3054 parse_unified_hunk_header_ranges(header_line)
3055 else {
3056 continue;
3057 };
3058
3059 let ctx = context_lines;
3060 let (s1, e1, s2, e2) = if skip_preimage_pull {
3061 let s = n_old.saturating_sub(ctx);
3062 let s2 = map_old_line_to_new(&all_ops, s, n_new).min(n_new);
3063 (s, n_old, s2, n_new)
3064 } else {
3065 let mut s1 = base_s1.saturating_sub(ctx);
3066 let mut s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
3067
3068 let base_pre_s1 = i1_anchor.saturating_sub(ctx);
3069 if base_pre_s1 < s1 {
3070 s1 = base_pre_s1;
3071 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
3072 }
3073
3074 let fs1 = expand_func_pre_start(s1, i1_anchor, n_old, &old_lines, funcname_matcher);
3075 if fs1 < s1 {
3076 s1 = fs1;
3077 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
3078 }
3079
3080 let mut e1 = (base_e1 + ctx).min(n_old);
3081 let mut e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
3082 let fe1 = expand_func_post_end(e1, i1_end, n_old, &old_lines, funcname_matcher);
3083 if fe1 > e1 {
3084 e1 = fe1;
3085 e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
3086 }
3087 (s1, e1, s2, e2)
3088 };
3089
3090 ranges.push((s1, e1, s2, e2));
3091 }
3092
3093 let mut output = String::new();
3094 if old_path == "/dev/null" {
3095 output.push_str("--- /dev/null\n");
3096 } else if src_prefix.is_empty() {
3097 output.push_str(&format!("--- {old_path}\n"));
3098 } else {
3099 output.push_str("--- ");
3100 output.push_str(&format_diff_path_with_prefix(
3101 src_prefix,
3102 old_path,
3103 quote_path_fully,
3104 ));
3105 output.push('\n');
3106 }
3107 if new_path == "/dev/null" {
3108 output.push_str("+++ /dev/null\n");
3109 } else if dst_prefix.is_empty() {
3110 output.push_str(&format!("+++ {new_path}\n"));
3111 } else {
3112 output.push_str("+++ ");
3113 output.push_str(&format_diff_path_with_prefix(
3114 dst_prefix,
3115 new_path,
3116 quote_path_fully,
3117 ));
3118 output.push('\n');
3119 }
3120
3121 for (s1, e1, s2, e2) in ranges {
3122 if s1 >= e1 && s2 >= e2 {
3123 continue;
3124 }
3125 let old_seg =
3126 line_slice_for_diff_with_eof_nl(&old_lines, s1, e1, old_content.ends_with('\n'));
3127 let new_seg =
3128 line_slice_for_diff_with_eof_nl(&new_lines, s2, e2, new_content.ends_with('\n'));
3129 let inner_ctx = old_seg.lines().count().max(new_seg.lines().count()).max(1);
3130 let piece = unified_diff_with_prefix_and_funcname_and_algorithm(
3131 &old_seg,
3132 &new_seg,
3133 old_path,
3134 new_path,
3135 inner_ctx,
3136 0,
3137 src_prefix,
3138 dst_prefix,
3139 funcname_matcher,
3140 algorithm,
3141 false,
3142 false,
3143 indent_heuristic,
3144 quote_path_fully,
3145 );
3146 let shifted = shift_unified_hunk_headers_to_full_file(&piece, s1, s2);
3147 let with_func =
3148 enrich_unified_hunk_headers_funcname(&shifted, &old_lines, funcname_matcher);
3149 for line in with_func.lines() {
3150 if line.starts_with("--- ") || line.starts_with("+++ ") {
3151 continue;
3152 }
3153 output.push_str(line);
3154 output.push('\n');
3155 }
3156 }
3157
3158 output
3159}
3160
3161fn shift_unified_hunk_headers_to_full_file(
3166 patch: &str,
3167 delta_old: usize,
3168 delta_new: usize,
3169) -> String {
3170 if delta_old == 0 && delta_new == 0 {
3171 return patch.to_owned();
3172 }
3173 let mut out = String::with_capacity(patch.len());
3174 for line in patch.lines() {
3175 if let Some(shifted) = shift_one_unified_hunk_header(line, delta_old, delta_new) {
3176 out.push_str(&shifted);
3177 } else {
3178 out.push_str(line);
3179 }
3180 out.push('\n');
3181 }
3182 out
3183}
3184
3185fn shift_one_unified_hunk_header(line: &str, delta_old: usize, delta_new: usize) -> Option<String> {
3186 let rest = line.strip_prefix("@@ ")?;
3187 let (old_chunk, after_plus) = rest.split_once(" +")?;
3188 let old_spec = old_chunk.strip_prefix('-')?;
3189 let (new_spec, suffix) = after_plus.split_once(" @@")?;
3190 let shifted_old = shift_unified_range_spec(old_spec, delta_old)?;
3191 let shifted_new = shift_unified_range_spec(new_spec, delta_new)?;
3192 Some(format!("@@ -{shifted_old} +{shifted_new} @@{suffix}"))
3193}
3194
3195fn shift_unified_range_spec(spec: &str, delta: usize) -> Option<String> {
3196 let spec = spec.trim();
3197 if let Some((start_s, count_s)) = spec.split_once(',') {
3198 let start: usize = start_s.parse().ok()?;
3199 let count: usize = count_s.parse().ok()?;
3200 Some(format!("{},{}", start.saturating_add(delta), count))
3201 } else {
3202 let start: usize = spec.parse().ok()?;
3203 Some(format!("{}", start.saturating_add(delta)))
3204 }
3205}
3206
3207fn enrich_unified_hunk_headers_funcname(
3209 patch: &str,
3210 full_old_lines: &[&str],
3211 funcname_matcher: Option<&FuncnameMatcher>,
3212) -> String {
3213 let mut out = String::with_capacity(patch.len());
3214 for line in patch.lines() {
3215 if let Some(fixed) = enrich_one_hunk_header_funcname(line, full_old_lines, funcname_matcher)
3216 {
3217 out.push_str(&fixed);
3218 } else {
3219 out.push_str(line);
3220 }
3221 out.push('\n');
3222 }
3223 out
3224}
3225
3226fn enrich_one_hunk_header_funcname(
3227 line: &str,
3228 full_old_lines: &[&str],
3229 funcname_matcher: Option<&FuncnameMatcher>,
3230) -> Option<String> {
3231 let after_at = line.strip_prefix("@@ ")?;
3232 let idx = after_at.find(" @@")?;
3233 let mid = after_at[..idx].trim();
3234 let tail = after_at[idx + 3..].trim_start();
3235 let header_for_parse = format!("@@ {mid} @@");
3236 let func = extract_function_context(&header_for_parse, full_old_lines, funcname_matcher);
3237 Some(if let Some(f) = func {
3238 format!("@@ {mid} @@ {f}")
3239 } else if !tail.is_empty() {
3240 format!("@@ {mid} @@ {tail}")
3241 } else {
3242 format!("@@ {mid} @@")
3243 })
3244}
3245
3246fn line_slice_for_diff_with_eof_nl(
3247 lines: &[&str],
3248 start: usize,
3249 end: usize,
3250 full_file_ends_with_newline: bool,
3251) -> String {
3252 if start >= end {
3253 return String::new();
3254 }
3255 let mut s = lines[start..end].join("\n");
3256 let slice_is_suffix_of_file = end == lines.len();
3257 let need_trailing_nl = if slice_is_suffix_of_file {
3258 full_file_ends_with_newline
3259 } else {
3260 true
3261 };
3262 if need_trailing_nl && !s.ends_with('\n') {
3263 s.push('\n');
3264 }
3265 s
3266}
3267
3268fn map_old_line_to_new(ops: &[similar::DiffOp], old_line: usize, n_new: usize) -> usize {
3271 use similar::DiffOp;
3272 let mut n = 0usize;
3273 for op in ops {
3274 match *op {
3275 DiffOp::Equal {
3276 old_index,
3277 new_index,
3278 len,
3279 } => {
3280 if old_index + len <= old_line {
3281 n = new_index + len;
3282 continue;
3283 }
3284 if old_index < old_line {
3285 let take = old_line - old_index;
3286 return (new_index + take).min(n_new);
3287 }
3288 return new_index.min(n_new);
3289 }
3290 DiffOp::Delete {
3291 old_index,
3292 old_len,
3293 new_index,
3294 } => {
3295 if old_index + old_len <= old_line {
3296 n = new_index;
3297 continue;
3298 }
3299 if old_index < old_line {
3300 return new_index.min(n_new);
3301 }
3302 }
3303 DiffOp::Insert {
3304 old_index,
3305 new_index,
3306 new_len,
3307 } => {
3308 if old_index < old_line {
3309 n = new_index + new_len;
3310 continue;
3311 }
3312 if old_index == old_line {
3313 return (new_index + new_len).min(n_new);
3316 }
3317 return new_index.min(n_new);
3318 }
3319 DiffOp::Replace {
3320 old_index,
3321 old_len,
3322 new_index,
3323 new_len,
3324 } => {
3325 if old_index + old_len <= old_line {
3326 n = new_index + new_len;
3327 continue;
3328 }
3329 if old_index < old_line {
3330 let into_old = old_line - old_index;
3331 let mapped = new_index + into_old.min(new_len);
3332 return mapped.min(n_new);
3333 }
3334 return new_index.min(n_new);
3335 }
3336 }
3337 }
3338 n.min(n_new)
3339}
3340
3341fn parse_unified_hunk_header_ranges(header: &str) -> Option<(usize, usize, usize, usize)> {
3343 let rest = header.strip_prefix("@@ ")?;
3344 let (old_tok, rest2) = rest.split_once(" +")?;
3345 let old_tok = old_tok.strip_prefix('-')?;
3346 let new_tok = rest2.split_once(" @@").map(|(a, _)| a)?;
3347
3348 fn parse_side(spec: &str) -> Option<(usize, usize)> {
3349 let spec = spec.trim();
3350 let (start_one_based, count) = if let Some((a, b)) = spec.split_once(',') {
3351 (a.parse::<usize>().ok()?, b.parse::<usize>().ok()?)
3352 } else {
3353 let s = spec.parse::<usize>().ok()?;
3354 (s, 1usize)
3355 };
3356 let s0 = start_one_based.saturating_sub(1);
3357 let e0 = s0.saturating_add(count);
3358 Some((s0, e0))
3359 }
3360
3361 let (os, oe) = parse_side(old_tok)?;
3362 let (ns, ne) = parse_side(new_tok)?;
3363 Some((os, oe, ns, ne))
3364}
3365
3366fn append_with_whole_function_added(
3369 ops: &[similar::DiffOp],
3370 n_old: usize,
3371 n_new: usize,
3372 new_lines: &[&str],
3373 matcher: Option<&FuncnameMatcher>,
3374) -> bool {
3375 use similar::DiffOp;
3376 if n_old == 0 {
3377 return false;
3378 }
3379 let mut only_ins_or_eq = true;
3380 let mut min_new_ins = usize::MAX;
3381 for op in ops {
3382 match *op {
3383 DiffOp::Equal { .. } => {}
3384 DiffOp::Insert {
3385 new_index, new_len, ..
3386 } => {
3387 min_new_ins = min_new_ins.min(new_index);
3388 if new_len == 0 {
3389 only_ins_or_eq = false;
3390 }
3391 }
3392 DiffOp::Delete { .. } | DiffOp::Replace { .. } => {
3393 only_ins_or_eq = false;
3394 }
3395 }
3396 }
3397 let mut insert_at_eof = false;
3398 for op in ops {
3399 if let DiffOp::Insert { old_index, .. } = *op {
3400 if old_index == n_old {
3401 insert_at_eof = true;
3402 break;
3403 }
3404 }
3405 }
3406 let append_at_eof = min_new_ins == n_old || insert_at_eof;
3407 if !only_ins_or_eq || !append_at_eof || min_new_ins == usize::MAX {
3408 return false;
3409 }
3410 let mut j = min_new_ins;
3415 while j < n_new {
3416 let line = new_lines[j];
3417 if line.trim().is_empty() {
3418 j += 1;
3419 continue;
3420 }
3421 if let Some(m) = matcher {
3422 if m.match_line(line).is_some() {
3423 return true;
3424 }
3425 } else if inserted_block_starts_with_c_like_function_definition(line) {
3426 return true;
3427 }
3428 j += 1;
3429 }
3430 false
3431}
3432
3433fn inserted_block_starts_with_c_like_function_definition(line: &str) -> bool {
3434 let t = line.trim_start();
3435 let Some(open_paren) = t.find('(') else {
3436 return false;
3437 };
3438 let head = &t[..open_paren];
3439 let tokens: Vec<&str> = head.split_whitespace().collect();
3440 if tokens.len() < 2 {
3441 return false;
3443 }
3444 let nameish = tokens.last().copied().unwrap_or("");
3445 let name = nameish.trim_end_matches(['*', '&']);
3446 if name.is_empty() || !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
3447 return false;
3448 }
3449 let type_or_modifier = |tok: &str| {
3450 matches!(
3451 tok,
3452 "static"
3453 | "extern"
3454 | "inline"
3455 | "void"
3456 | "int"
3457 | "char"
3458 | "short"
3459 | "long"
3460 | "float"
3461 | "double"
3462 | "unsigned"
3463 | "signed"
3464 | "struct"
3465 | "enum"
3466 | "union"
3467 | "const"
3468 | "volatile"
3469 | "typedef"
3470 )
3471 };
3472 tokens[..tokens.len() - 1]
3473 .iter()
3474 .any(|tok| type_or_modifier(tok))
3475}
3476
3477fn hunk_old_change_end_exclusive(ops: &[similar::DiffOp]) -> usize {
3478 use similar::DiffOp;
3479 let mut max_o = 0usize;
3480 for op in ops {
3481 match *op {
3482 DiffOp::Delete {
3483 old_index, old_len, ..
3484 } => {
3485 max_o = max_o.max(old_index + old_len);
3486 }
3487 DiffOp::Replace {
3488 old_index, old_len, ..
3489 } => {
3490 max_o = max_o.max(old_index + old_len);
3491 }
3492 DiffOp::Insert { old_index, .. } => {
3493 max_o = max_o.max(old_index);
3496 }
3497 DiffOp::Equal { .. } => {}
3498 }
3499 }
3500 max_o
3501}
3502
3503fn func_context_old_anchor(ops: &[similar::DiffOp], n_old: usize) -> usize {
3504 use similar::DiffOp;
3505 let mut has_delete_or_replace = false;
3506 let mut min_del = usize::MAX;
3507 let mut min_ins_old = usize::MAX;
3508
3509 for op in ops {
3510 match *op {
3511 DiffOp::Delete {
3512 old_index, old_len, ..
3513 } => {
3514 has_delete_or_replace = true;
3515 min_del = min_del.min(old_index);
3516 min_del = min_del.min(old_index + old_len.saturating_sub(1));
3517 }
3518 DiffOp::Replace {
3519 old_index, old_len, ..
3520 } => {
3521 has_delete_or_replace = true;
3522 min_del = min_del.min(old_index);
3523 min_del = min_del.min(old_index + old_len.saturating_sub(1));
3524 }
3525 DiffOp::Insert { old_index, .. } => {
3526 min_ins_old = min_ins_old.min(old_index);
3527 }
3528 DiffOp::Equal { .. } => {}
3529 }
3530 }
3531
3532 let mut i1 = if has_delete_or_replace {
3533 min_del
3534 } else if min_ins_old != usize::MAX {
3535 min_ins_old
3536 } else {
3537 0
3538 };
3539
3540 let pure_insert = ops
3541 .iter()
3542 .all(|op| matches!(op, DiffOp::Insert { .. } | DiffOp::Equal { .. }))
3543 && ops.iter().any(|op| matches!(op, DiffOp::Insert { .. }));
3544
3545 if pure_insert && i1 >= n_old && n_old > 0 {
3546 i1 = n_old - 1;
3547 }
3548
3549 i1.min(n_old.saturating_sub(1))
3550}
3551
3552fn expand_func_pre_start(
3553 s1: usize,
3554 i1: usize,
3555 n_old: usize,
3556 old_lines: &[&str],
3557 matcher: Option<&FuncnameMatcher>,
3558) -> usize {
3559 if n_old == 0 {
3560 return s1;
3561 }
3562 let i1 = i1.min(n_old.saturating_sub(1));
3563 let mut fs1 = get_func_line_backward(old_lines, i1, matcher).unwrap_or(i1);
3564 while fs1 > 0
3565 && !is_line_empty_for_func_context(old_lines[fs1 - 1])
3566 && !is_func_line(old_lines[fs1 - 1], matcher)
3567 {
3568 fs1 -= 1;
3569 }
3570 s1.min(fs1)
3571}
3572
3573fn expand_func_post_end(
3574 e1: usize,
3575 i1_end: usize,
3576 n_old: usize,
3577 old_lines: &[&str],
3578 matcher: Option<&FuncnameMatcher>,
3579) -> usize {
3580 let from = i1_end.min(n_old);
3581 let fe1 = get_func_line_forward(old_lines, from, matcher).unwrap_or(n_old);
3582 let mut fe1_adj = fe1;
3583 while fe1_adj > 0 && is_line_empty_for_func_context(old_lines[fe1_adj - 1]) {
3584 fe1_adj -= 1;
3585 }
3586 e1.max(fe1_adj).min(n_old)
3587}
3588
3589fn is_line_empty_for_func_context(line: &str) -> bool {
3590 line.chars().all(|c| c.is_whitespace())
3591}
3592
3593fn is_func_line(line: &str, matcher: Option<&FuncnameMatcher>) -> bool {
3594 if let Some(m) = matcher {
3595 return m.match_line(line).is_some();
3596 }
3597 let t = line.trim_end_matches(['\n', '\r']);
3598 if t.is_empty() {
3599 return false;
3600 }
3601 let b = t.as_bytes()[0];
3602 b.is_ascii_alphabetic() || b == b'_' || b == b'$'
3603}
3604
3605fn get_func_line_backward(
3606 old_lines: &[&str],
3607 start: usize,
3608 matcher: Option<&FuncnameMatcher>,
3609) -> Option<usize> {
3610 let mut l = start.min(old_lines.len().saturating_sub(1));
3611 if old_lines.is_empty() {
3612 return None;
3613 }
3614 loop {
3615 if is_func_line(old_lines[l], matcher) {
3616 return Some(l);
3617 }
3618 if l == 0 {
3619 break;
3620 }
3621 l -= 1;
3622 }
3623 None
3624}
3625
3626fn get_func_line_forward(
3627 old_lines: &[&str],
3628 start: usize,
3629 matcher: Option<&FuncnameMatcher>,
3630) -> Option<usize> {
3631 let mut l = start;
3632 while l < old_lines.len() {
3633 if is_func_line(old_lines[l], matcher) {
3634 return Some(l);
3635 }
3636 l += 1;
3637 }
3638 None
3639}
3640
3641pub fn anchored_unified_diff(
3651 old_content: &str,
3652 new_content: &str,
3653 old_path: &str,
3654 new_path: &str,
3655 context_lines: usize,
3656 anchors: &[String],
3657 algorithm: similar::Algorithm,
3658 use_git_histogram: bool,
3659 indent_heuristic: bool,
3660 quote_path_fully: bool,
3661) -> String {
3662 use crate::quote_path::format_diff_path_with_prefix;
3663 use similar::TextDiff;
3664
3665 let old_lines: Vec<&str> = old_content.lines().collect();
3666 let new_lines: Vec<&str> = new_content.lines().collect();
3667
3668 let mut anchor_pairs: Vec<(usize, usize)> = Vec::new(); for anchor in anchors {
3672 let anchor_str = anchor.as_str();
3673
3674 let old_positions: Vec<usize> = old_lines
3676 .iter()
3677 .enumerate()
3678 .filter(|(_, l)| l.trim_end() == anchor_str)
3679 .map(|(i, _)| i)
3680 .collect();
3681
3682 let new_positions: Vec<usize> = new_lines
3684 .iter()
3685 .enumerate()
3686 .filter(|(_, l)| l.trim_end() == anchor_str)
3687 .map(|(i, _)| i)
3688 .collect();
3689
3690 if old_positions.len() == 1 && new_positions.len() == 1 {
3692 anchor_pairs.push((old_positions[0], new_positions[0]));
3693 }
3694 }
3695
3696 if anchor_pairs.is_empty() {
3698 return unified_diff_with_prefix_and_funcname_and_algorithm(
3699 old_content,
3700 new_content,
3701 old_path,
3702 new_path,
3703 context_lines,
3704 0,
3705 "a/",
3706 "b/",
3707 None,
3708 algorithm,
3709 false,
3710 use_git_histogram,
3711 indent_heuristic,
3712 quote_path_fully,
3713 );
3714 }
3715
3716 anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
3718
3719 let mut filtered: Vec<(usize, usize)> = Vec::new();
3722 for &pair in &anchor_pairs {
3723 if filtered.is_empty() || filtered.last().is_some_and(|last| pair.1 > last.1) {
3724 filtered.push(pair);
3725 }
3726 }
3727 let anchor_pairs = filtered;
3728
3729 struct LineDiffOp {
3738 tag: char, line: String,
3740 }
3741
3742 let append_segment_diff =
3743 |ops: &mut Vec<LineDiffOp>, old_seg_input: &str, new_seg_input: &str| {
3744 use similar::ChangeTag;
3745 let old_ls: Vec<&str> = old_seg_input.lines().collect();
3746 let new_ls: Vec<&str> = new_seg_input.lines().collect();
3747 if old_ls.is_empty() && new_ls.is_empty() {
3748 return;
3749 }
3750 let seg_diff = TextDiff::configure()
3751 .algorithm(algorithm)
3752 .diff_slices(&old_ls, &new_ls);
3753 let raw = seg_diff.ops().to_vec();
3754 let compacted = diff_indent_heuristic::apply_change_compact_to_ops(
3755 &raw,
3756 &old_ls,
3757 &new_ls,
3758 indent_heuristic,
3759 );
3760 for op in &compacted {
3761 for ch in op.iter_changes(&old_ls, &new_ls) {
3762 let t = match ch.tag() {
3763 ChangeTag::Equal => ' ',
3764 ChangeTag::Delete => '-',
3765 ChangeTag::Insert => '+',
3766 };
3767 ops.push(LineDiffOp {
3768 tag: t,
3769 line: ch.value().to_string(),
3770 });
3771 }
3772 }
3773 };
3774
3775 let mut ops: Vec<LineDiffOp> = Vec::new();
3776 let mut old_pos = 0usize;
3777 let mut new_pos = 0usize;
3778
3779 for &(old_anchor, new_anchor) in &anchor_pairs {
3780 let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
3782 let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
3783
3784 let old_seg_text = old_segment.join("\n");
3785 let new_seg_text = new_segment.join("\n");
3786
3787 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
3788 let old_seg_input = if old_seg_text.is_empty() {
3789 String::new()
3790 } else {
3791 format!("{}\n", old_seg_text)
3792 };
3793 let new_seg_input = if new_seg_text.is_empty() {
3794 String::new()
3795 } else {
3796 format!("{}\n", new_seg_text)
3797 };
3798 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
3799 }
3800
3801 ops.push(LineDiffOp {
3803 tag: ' ',
3804 line: old_lines[old_anchor].to_string(),
3805 });
3806
3807 old_pos = old_anchor + 1;
3808 new_pos = new_anchor + 1;
3809 }
3810
3811 let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
3813 let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
3814 let old_seg_text = old_segment.join("\n");
3815 let new_seg_text = new_segment.join("\n");
3816
3817 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
3818 let old_seg_input = if old_seg_text.is_empty() {
3819 String::new()
3820 } else {
3821 format!("{}\n", old_seg_text)
3822 };
3823 let new_seg_input = if new_seg_text.is_empty() {
3824 String::new()
3825 } else {
3826 format!("{}\n", new_seg_text)
3827 };
3828 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
3829 }
3830
3831 let mut output = String::new();
3833 if old_path == "/dev/null" {
3834 output.push_str("--- /dev/null\n");
3835 } else {
3836 output.push_str("--- ");
3837 output.push_str(&format_diff_path_with_prefix(
3838 "a/",
3839 old_path,
3840 quote_path_fully,
3841 ));
3842 output.push('\n');
3843 }
3844 if new_path == "/dev/null" {
3845 output.push_str("+++ /dev/null\n");
3846 } else {
3847 output.push_str("+++ ");
3848 output.push_str(&format_diff_path_with_prefix(
3849 "b/",
3850 new_path,
3851 quote_path_fully,
3852 ));
3853 output.push('\n');
3854 }
3855
3856 let total_ops = ops.len();
3858 if total_ops == 0 {
3859 return output;
3860 }
3861
3862 let mut hunks: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
3865 while i < total_ops {
3866 if ops[i].tag != ' ' {
3867 let start = i.saturating_sub(context_lines);
3868 let mut end = i;
3869 while end < total_ops {
3871 if ops[end].tag != ' ' {
3872 end += 1;
3873 continue;
3874 }
3875 let mut next_change = end;
3877 while next_change < total_ops && ops[next_change].tag == ' ' {
3878 next_change += 1;
3879 }
3880 if next_change < total_ops && next_change - end <= context_lines * 2 {
3881 end = next_change + 1;
3882 } else {
3883 end = (end + context_lines).min(total_ops);
3884 break;
3885 }
3886 }
3887 if let Some(last) = hunks.last_mut() {
3889 if start <= last.1 {
3890 last.1 = end;
3891 } else {
3892 hunks.push((start, end));
3893 }
3894 } else {
3895 hunks.push((start, end));
3896 }
3897 i = end;
3898 } else {
3899 i += 1;
3900 }
3901 }
3902
3903 for (start, end) in hunks {
3905 let mut old_start = 1usize;
3907 let mut new_start = 1usize;
3908 for op in &ops[..start] {
3910 match op.tag {
3911 ' ' => {
3912 old_start += 1;
3913 new_start += 1;
3914 }
3915 '-' => {
3916 old_start += 1;
3917 }
3918 '+' => {
3919 new_start += 1;
3920 }
3921 _ => {}
3922 }
3923 }
3924 let mut old_count = 0usize;
3925 let mut new_count = 0usize;
3926 for op in &ops[start..end] {
3927 match op.tag {
3928 ' ' => {
3929 old_count += 1;
3930 new_count += 1;
3931 }
3932 '-' => {
3933 old_count += 1;
3934 }
3935 '+' => {
3936 new_count += 1;
3937 }
3938 _ => {}
3939 }
3940 }
3941
3942 output.push_str(&format!(
3943 "@@ -{},{} +{},{} @@\n",
3944 old_start, old_count, new_start, new_count
3945 ));
3946 for op in &ops[start..end] {
3947 output.push(op.tag);
3948 output.push_str(&op.line);
3949 output.push('\n');
3950 }
3951 }
3952
3953 output
3954}
3955
3956fn extract_function_context(
3962 header: &str,
3963 old_lines: &[&str],
3964 funcname_matcher: Option<&FuncnameMatcher>,
3965) -> Option<String> {
3966 let at_pos = header.find("-")?;
3968 let rest = &header[at_pos + 1..];
3969 let comma_or_space = rest.find([',', ' '])?;
3970 let start_str = &rest[..comma_or_space];
3971 let start_line: usize = start_str.parse().ok()?;
3972
3973 let old_token_end = rest.find([' ', '\t']).unwrap_or(rest.len());
3977 let old_token = &rest[..old_token_end];
3978 let old_count: usize = if let Some(comma) = old_token.find(',') {
3979 old_token[comma + 1..].parse().unwrap_or(1)
3980 } else {
3981 1
3982 };
3983
3984 if start_line == 0 {
3985 return None;
3986 }
3987
3988 let search_end = if old_count == 0 {
3995 start_line.min(old_lines.len())
3996 } else {
3997 if start_line <= 1 {
3998 return None;
3999 }
4000 (start_line - 1).min(old_lines.len())
4001 };
4002 let truncate = |text: &str| {
4003 if text.len() > 80 {
4004 let mut end = 80;
4005 while end > 0 && !text.is_char_boundary(end) {
4006 end -= 1;
4007 }
4008 text[..end].to_owned()
4009 } else {
4010 text.to_owned()
4011 }
4012 };
4013
4014 for i in (0..search_end).rev() {
4015 let line = old_lines[i];
4016 if line.is_empty() {
4017 continue;
4018 }
4019 if let Some(matcher) = funcname_matcher {
4020 if let Some(matched) = matcher.match_line(line) {
4021 return Some(truncate(&matched));
4022 }
4023 continue;
4024 }
4025
4026 let first = line.as_bytes()[0];
4027 if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
4028 return Some(truncate(line.trim_end_matches(char::is_whitespace)));
4029 }
4030 }
4031 None
4032}
4033
4034pub fn format_stat_line(
4038 path: &str,
4039 insertions: usize,
4040 deletions: usize,
4041 max_path_len: usize,
4042) -> String {
4043 format_stat_line_width(path, insertions, deletions, max_path_len, 0)
4044}
4045
4046pub fn format_stat_line_width(
4047 path: &str,
4048 insertions: usize,
4049 deletions: usize,
4050 max_path_len: usize,
4051 count_width: usize,
4052) -> String {
4053 let total = insertions + deletions;
4054 let plus = "+".repeat(insertions.min(50));
4055 let minus = "-".repeat(deletions.min(50));
4056 let cw = if count_width > 0 {
4057 count_width
4058 } else {
4059 format!("{}", total).len()
4060 };
4061 let bar = format!("{}{}", plus, minus);
4062 if bar.is_empty() {
4063 format!(
4064 " {:<width$} | {:>cw$}",
4065 path,
4066 total,
4067 width = max_path_len,
4068 cw = cw
4069 )
4070 } else {
4071 format!(
4072 " {:<width$} | {:>cw$} {}",
4073 path,
4074 total,
4075 bar,
4076 width = max_path_len,
4077 cw = cw
4078 )
4079 }
4080}
4081
4082#[must_use]
4084pub fn normalize_ignore_space_change_line(line: &str) -> String {
4085 let mut result = String::with_capacity(line.len());
4086 let mut in_space = false;
4087 for c in line.chars() {
4088 if c.is_whitespace() {
4089 if !in_space {
4090 result.push(' ');
4091 in_space = true;
4092 }
4093 } else {
4094 result.push(c);
4095 in_space = false;
4096 }
4097 }
4098 while result.ends_with(' ') {
4099 result.pop();
4100 }
4101 result
4102}
4103
4104#[must_use]
4110pub fn normalize_ignore_space_change(content: &str) -> String {
4111 content
4112 .lines()
4113 .map(normalize_ignore_space_change_line)
4114 .collect::<Vec<_>>()
4115 .join("\n")
4116}
4117
4118pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
4122 count_changes_with_algorithm(old_content, new_content, similar::Algorithm::Myers, false)
4123}
4124
4125#[must_use]
4130pub fn count_changes_with_algorithm(
4131 old_content: &str,
4132 new_content: &str,
4133 algorithm: similar::Algorithm,
4134 use_git_histogram: bool,
4135) -> (usize, usize) {
4136 if use_git_histogram {
4137 use imara_diff::{Algorithm, Diff, InternedInput};
4138 let input = InternedInput::new(old_content, new_content);
4139 let mut d = Diff::compute(Algorithm::Histogram, &input);
4140 d.postprocess_lines(&input);
4141 return (d.count_additions() as usize, d.count_removals() as usize);
4142 }
4143
4144 use similar::{ChangeTag, TextDiff};
4145
4146 let diff = TextDiff::configure()
4147 .algorithm(algorithm)
4148 .diff_lines(old_content, new_content);
4149 let mut ins = 0;
4150 let mut del = 0;
4151
4152 for change in diff.iter_all_changes() {
4153 match change.tag() {
4154 ChangeTag::Insert => ins += 1,
4155 ChangeTag::Delete => del += 1,
4156 ChangeTag::Equal => {}
4157 }
4158 }
4159
4160 (ins, del)
4161}
4162
4163#[must_use]
4168pub fn count_git_lines(data: &[u8]) -> usize {
4169 if data.is_empty() {
4170 return 0;
4171 }
4172 let mut count = 0usize;
4173 let mut nl_just_seen = false;
4174 for &ch in data {
4175 if ch == b'\n' {
4176 count += 1;
4177 nl_just_seen = true;
4178 } else {
4179 nl_just_seen = false;
4180 }
4181 }
4182 if !nl_just_seen {
4183 count += 1;
4184 }
4185 count
4186}
4187
4188pub const GIT_DIFF_MAX_SCORE: u64 = 60_000;
4190const DIFF_MAX_SCORE: u64 = GIT_DIFF_MAX_SCORE;
4191const DIFF_MINIMUM_BREAK_SIZE: usize = 400;
4192const DIFF_DEFAULT_BREAK_SCORE: u64 = 30_000;
4193pub const GIT_DIFF_DEFAULT_BREAK_SCORE: u64 = DIFF_DEFAULT_BREAK_SCORE;
4195pub const GIT_DIFF_DEFAULT_MERGE_SCORE_AFTER_BREAK: u64 = 36_000;
4198const DIFF_HASHBASE: u32 = 107_927;
4199
4200#[derive(Clone, Copy, Default)]
4201struct SpanSlot {
4202 hashval: u32,
4203 cnt: u32,
4204}
4205
4206struct SpanHashTop {
4207 alloc_log2: u8,
4208 free_slots: i32,
4209 data: Vec<SpanSlot>,
4210}
4211
4212impl SpanHashTop {
4213 fn new(initial_log2: u8) -> Self {
4214 let cap = 1usize << initial_log2;
4215 Self {
4216 alloc_log2: initial_log2,
4217 free_slots: initial_free(initial_log2),
4218 data: vec![SpanSlot::default(); cap],
4219 }
4220 }
4221
4222 fn len(&self) -> usize {
4223 1usize << self.alloc_log2
4224 }
4225
4226 fn add_span(&mut self, hashval: u32, cnt: u32) {
4227 loop {
4228 let lim = self.len();
4229 let mut bucket = (hashval as usize) & (lim - 1);
4230 loop {
4231 let h = &mut self.data[bucket];
4232 if h.cnt == 0 {
4233 h.hashval = hashval;
4234 h.cnt = cnt;
4235 self.free_slots -= 1;
4236 if self.free_slots < 0 {
4237 self.rehash();
4238 break;
4239 }
4240 return;
4241 }
4242 if h.hashval == hashval {
4243 h.cnt = h.cnt.saturating_add(cnt);
4244 return;
4245 }
4246 bucket += 1;
4247 if bucket >= lim {
4248 bucket = 0;
4249 }
4250 }
4251 }
4252 }
4253
4254 fn rehash(&mut self) {
4255 let old = std::mem::take(&mut self.data);
4256 let old_log = self.alloc_log2;
4257 self.alloc_log2 = old_log.saturating_add(1);
4258 let new_len = 1usize << self.alloc_log2;
4259 self.free_slots = initial_free(self.alloc_log2);
4260 self.data = vec![SpanSlot::default(); new_len];
4261 let old_sz = 1usize << old_log;
4262 for o in old.iter().take(old_sz) {
4263 let o = *o;
4264 if o.cnt == 0 {
4265 continue;
4266 }
4267 self.add_span_after_rehash(o.hashval, o.cnt);
4268 }
4269 }
4270
4271 fn add_span_after_rehash(&mut self, hashval: u32, cnt: u32) {
4272 loop {
4273 let lim = self.len();
4274 let mut bucket = (hashval as usize) & (lim - 1);
4275 loop {
4276 let h = &mut self.data[bucket];
4277 if h.cnt == 0 {
4278 h.hashval = hashval;
4279 h.cnt = cnt;
4280 self.free_slots -= 1;
4281 if self.free_slots < 0 {
4282 self.rehash();
4283 break;
4284 }
4285 return;
4286 }
4287 if h.hashval == hashval {
4288 h.cnt = h.cnt.saturating_add(cnt);
4289 return;
4290 }
4291 bucket += 1;
4292 if bucket >= lim {
4293 bucket = 0;
4294 }
4295 }
4296 }
4297 }
4298
4299 fn sort_by_hashval(&mut self) {
4300 let sz = self.len();
4301 self.data[..sz].sort_by(|a, b| {
4302 if a.cnt == 0 {
4303 return std::cmp::Ordering::Greater;
4304 }
4305 if b.cnt == 0 {
4306 return std::cmp::Ordering::Less;
4307 }
4308 a.hashval.cmp(&b.hashval)
4309 });
4310 }
4311}
4312
4313fn initial_free(sz_log2: u8) -> i32 {
4314 let sz = sz_log2 as i32;
4315 ((1i32 << sz_log2) * (sz - 3) / sz).max(0)
4316}
4317
4318fn hash_blob_spans(buf: &[u8], is_text: bool) -> SpanHashTop {
4319 let mut hash = SpanHashTop::new(9);
4320 let mut n = 0u32;
4321 let mut accum1: u32 = 0;
4322 let mut accum2: u32 = 0;
4323 let mut i = 0usize;
4324 while i < buf.len() {
4325 let c = buf[i] as u32;
4326 let old_1 = accum1;
4327 i += 1;
4328
4329 if is_text && c == b'\r' as u32 && i < buf.len() && buf[i] == b'\n' {
4330 continue;
4331 }
4332
4333 accum1 = accum1.wrapping_shl(7) ^ accum2.wrapping_shr(25);
4334 accum2 = accum2.wrapping_shl(7) ^ old_1.wrapping_shr(25);
4335 accum1 = accum1.wrapping_add(c);
4336 n += 1;
4337 if n < 64 && c != b'\n' as u32 {
4338 continue;
4339 }
4340 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
4341 hash.add_span(hashval, n);
4342 n = 0;
4343 accum1 = 0;
4344 accum2 = 0;
4345 }
4346 if n > 0 {
4347 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
4348 hash.add_span(hashval, n);
4349 }
4350 hash.sort_by_hashval();
4351 hash
4352}
4353
4354#[must_use]
4359pub fn diffcore_count_changes(old: &[u8], new: &[u8]) -> (u64, u64) {
4360 let src_is_text = !crate::merge_file::is_binary(old);
4361 let dst_is_text = !crate::merge_file::is_binary(new);
4362 let src_count = hash_blob_spans(old, src_is_text);
4363 let dst_count = hash_blob_spans(new, dst_is_text);
4364 let mut sc: u64 = 0;
4365 let mut la: u64 = 0;
4366 let mut si = 0usize;
4367 let mut di = 0usize;
4368 let src_len = src_count.len();
4369 let dst_len = dst_count.len();
4370 loop {
4371 if si >= src_len || src_count.data[si].cnt == 0 {
4372 break;
4373 }
4374 let s_hash = src_count.data[si].hashval;
4375 let s_cnt = u64::from(src_count.data[si].cnt);
4376 while di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval < s_hash {
4377 la += u64::from(dst_count.data[di].cnt);
4378 di += 1;
4379 }
4380 let mut dst_cnt = 0u64;
4381 if di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval == s_hash {
4382 dst_cnt = u64::from(dst_count.data[di].cnt);
4383 di += 1;
4384 }
4385 if s_cnt < dst_cnt {
4386 la += dst_cnt - s_cnt;
4387 sc += s_cnt;
4388 } else {
4389 sc += dst_cnt;
4390 }
4391 si += 1;
4392 }
4393 while di < dst_len && dst_count.data[di].cnt != 0 {
4394 la += u64::from(dst_count.data[di].cnt);
4395 di += 1;
4396 }
4397 (sc, la)
4398}
4399
4400#[must_use]
4403pub fn should_break_rewrite_for_stat(old: &[u8], new: &[u8]) -> bool {
4404 should_break_rewrite_inner(old, new, DIFF_DEFAULT_BREAK_SCORE)
4405}
4406
4407#[must_use]
4411pub fn should_break_rewrite_pair(old: &[u8], new: &[u8], break_score: u64) -> bool {
4412 should_break_rewrite_inner(old, new, break_score)
4413}
4414
4415pub fn parse_diff_rename_score_token(arg: &str) -> Option<u64> {
4418 let mut num: u64 = 0;
4419 let mut scale: u64 = 1;
4420 let mut dot = false;
4421 let mut saw_digit = false;
4422 for ch in arg.chars() {
4423 if !dot && ch == '.' {
4424 scale = 1;
4425 dot = true;
4426 continue;
4427 }
4428 if ch == '%' {
4429 scale = if dot { scale.saturating_mul(100) } else { 100 };
4430 break;
4431 }
4432 if ch.is_ascii_digit() {
4433 saw_digit = true;
4434 if scale < 100_000 {
4435 scale = scale.saturating_mul(10);
4436 num = num.saturating_mul(10) + u64::from(ch as u8 - b'0');
4437 }
4438 } else {
4439 break;
4440 }
4441 }
4442 if !saw_digit {
4443 return None;
4444 }
4445 Some(if num >= scale {
4446 GIT_DIFF_MAX_SCORE
4447 } else {
4448 GIT_DIFF_MAX_SCORE * num / scale
4449 })
4450}
4451
4452#[must_use]
4455pub fn rewrite_merge_score(old: &[u8], new: &[u8]) -> Option<u64> {
4456 if old.is_empty() {
4457 return None;
4458 }
4459 let max_size = old.len().max(new.len());
4460 if max_size < DIFF_MINIMUM_BREAK_SIZE {
4461 return None;
4462 }
4463 let (src_copied, _) = diffcore_count_changes(old, new);
4464 let src_copied = src_copied.min(old.len() as u64);
4465 let src_removed = (old.len() as u64).saturating_sub(src_copied);
4466 Some(src_removed * DIFF_MAX_SCORE / old.len() as u64)
4467}
4468
4469#[must_use]
4471pub fn rewrite_dissimilarity_index_percent(old: &[u8], new: &[u8]) -> Option<u32> {
4472 let score = rewrite_merge_score(old, new)?;
4473 Some((score * 100 / DIFF_MAX_SCORE).min(100) as u32)
4474}
4475
4476fn should_break_rewrite_inner(src: &[u8], dst: &[u8], break_score: u64) -> bool {
4477 if src.is_empty() {
4478 return false;
4479 }
4480 let max_size = src.len().max(dst.len());
4481 if max_size < DIFF_MINIMUM_BREAK_SIZE {
4482 return false;
4483 }
4484 let (src_copied, literal_added) = diffcore_count_changes(src, dst);
4485 let src_copied = src_copied.min(src.len() as u64);
4486 let mut literal_added = literal_added;
4487 let dst_len = dst.len() as u64;
4488 if src_copied < dst_len && literal_added + src_copied > dst_len {
4489 literal_added = dst_len.saturating_sub(src_copied);
4490 }
4491 let src_removed = (src.len() as u64).saturating_sub(src_copied);
4492 let merge_score = src_removed * DIFF_MAX_SCORE / src.len() as u64;
4493 if merge_score > break_score {
4494 return true;
4495 }
4496 let delta_size = src_removed.saturating_add(literal_added);
4497 if delta_size * DIFF_MAX_SCORE / (max_size as u64) < break_score {
4498 return false;
4499 }
4500 let s = src.len() as u64;
4501 if (s * break_score < src_removed * DIFF_MAX_SCORE)
4502 && (literal_added * 20 < src_removed)
4503 && (literal_added * 20 < src_copied)
4504 {
4505 return false;
4506 }
4507 true
4508}
4509
4510struct FlatEntry {
4514 path: String,
4515 mode: u32,
4516 oid: ObjectId,
4517}
4518
4519fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
4520 let entries = read_tree(odb, tree_oid)?;
4521 let mut result = Vec::new();
4522
4523 for entry in entries {
4524 let name_str = String::from_utf8_lossy(&entry.name);
4525 let path = format_path(prefix, &name_str);
4526 if is_tree_mode(entry.mode) {
4527 let nested = flatten_tree(odb, &entry.oid, &path)?;
4528 result.extend(nested);
4529 } else {
4530 result.push(FlatEntry {
4531 path,
4532 mode: entry.mode,
4533 oid: entry.oid,
4534 });
4535 }
4536 }
4537
4538 Ok(result)
4539}
4540
4541pub fn head_path_states(
4543 odb: &Odb,
4544 head_tree: Option<&ObjectId>,
4545) -> Result<std::collections::BTreeMap<String, (u32, ObjectId)>> {
4546 let mut m = std::collections::BTreeMap::new();
4547 let Some(t) = head_tree else {
4548 return Ok(m);
4549 };
4550 for fe in flatten_tree(odb, t, "")? {
4551 m.insert(fe.path, (fe.mode, fe.oid));
4552 }
4553 Ok(m)
4554}
4555
4556fn is_tree_mode(mode: u32) -> bool {
4558 mode == 0o040000
4559}
4560
4561fn format_path(prefix: &str, name: &str) -> String {
4563 if prefix.is_empty() {
4564 name.to_owned()
4565 } else {
4566 format!("{prefix}/{name}")
4567 }
4568}
4569
4570pub fn format_mode(mode: u32) -> String {
4572 format!("{mode:06o}")
4573}
4574
4575#[must_use]
4579pub fn read_submodule_head_for_checkout(sub_dir: &Path) -> Option<ObjectId> {
4580 read_submodule_head(sub_dir)
4581}
4582
4583#[must_use]
4588pub fn submodule_commit_subject_line(c: &CommitData) -> String {
4589 let enc = c.encoding.as_deref().unwrap_or("UTF-8");
4590 let is_latin1 = enc.eq_ignore_ascii_case("ISO8859-1")
4591 || enc.eq_ignore_ascii_case("ISO-8859-1")
4592 || enc.eq_ignore_ascii_case("LATIN1")
4593 || enc.eq_ignore_ascii_case("ISO-8859-15");
4594 if let Some(raw) = c.raw_message.as_deref() {
4595 let line = raw.split(|b| *b == b'\n').next().unwrap_or(raw);
4596 if is_latin1 {
4597 return line
4598 .iter()
4599 .map(|&b| b as char)
4600 .collect::<String>()
4601 .trim()
4602 .to_owned();
4603 }
4604 return String::from_utf8_lossy(line).trim().to_string();
4605 }
4606 c.message.lines().next().unwrap_or("").trim().to_owned()
4607}
4608
4609fn submodule_worktree_is_unpopulated_placeholder(sub_dir: &Path) -> bool {
4612 match fs::read_dir(sub_dir) {
4613 Ok(mut it) => it.next().is_none(),
4614 Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
4615 Err(_) => false,
4616 }
4617}
4618
4619fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
4620 read_submodule_head_oid(sub_dir)
4621}
4622
4623#[must_use]
4625pub fn submodule_embedded_git_dir(sub_dir: &Path) -> Option<PathBuf> {
4626 let gitfile = sub_dir.join(".git");
4627 if gitfile.is_file() {
4628 let content = fs::read_to_string(&gitfile).ok()?;
4629 let gitdir = content
4630 .lines()
4631 .find_map(|l| l.strip_prefix("gitdir: "))?
4632 .trim();
4633 Some(if Path::new(gitdir).is_absolute() {
4634 PathBuf::from(gitdir)
4635 } else {
4636 sub_dir.join(gitdir)
4637 })
4638 } else if gitfile.is_dir() {
4639 Some(gitfile)
4640 } else {
4641 None
4642 }
4643}
4644
4645fn find_superproject_git(sub_dir: &Path) -> Option<(PathBuf, PathBuf)> {
4647 let mut cur = sub_dir.parent()?;
4648 loop {
4649 let git_path = cur.join(".git");
4650 if git_path.exists() {
4651 let gd = if git_path.is_file() {
4652 let content = fs::read_to_string(&git_path).ok()?;
4653 let line = content
4654 .lines()
4655 .find_map(|l| l.strip_prefix("gitdir: "))?
4656 .trim();
4657 if Path::new(line).is_absolute() {
4658 PathBuf::from(line)
4659 } else {
4660 cur.join(line)
4661 }
4662 } else {
4663 git_path
4664 };
4665 return Some((cur.to_path_buf(), gd));
4666 }
4667 cur = cur.parent()?;
4668 }
4669}
4670
4671pub fn read_submodule_head_oid(sub_dir: &Path) -> Option<ObjectId> {
4677 let mut git_dir = submodule_embedded_git_dir(sub_dir)?;
4680 if let Some((super_wt, super_git_dir)) = find_superproject_git(sub_dir) {
4681 let rel = sub_dir.strip_prefix(&super_wt).ok()?;
4682 let rel_str = rel.to_string_lossy().replace('\\', "/");
4683 let local_mod = super_git_dir
4684 .join("modules")
4685 .join(rel_str.trim_start_matches('/'));
4686 if local_mod.join("HEAD").exists() {
4687 let sg = super_git_dir.canonicalize().unwrap_or(super_git_dir);
4688 let cur = git_dir.canonicalize().unwrap_or_else(|_| git_dir.clone());
4689 if !cur.starts_with(&sg) {
4690 git_dir = local_mod;
4691 }
4692 }
4693 }
4694 let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
4695 let head_trimmed = head_content.trim();
4696 if head_trimmed.starts_with("ref: ") {
4697 match crate::refs::resolve_ref(&git_dir, "HEAD") {
4701 Ok(oid) => Some(oid),
4702 Err(_) => {
4703 let mut found = None;
4704 for branch in ["main", "master"] {
4705 let p = git_dir.join("refs/heads").join(branch);
4706 if let Ok(s) = fs::read_to_string(&p) {
4707 if let Ok(o) = ObjectId::from_hex(s.trim()) {
4708 found = Some(o);
4709 break;
4710 }
4711 }
4712 }
4713 found
4714 }
4715 }
4716 } else {
4717 ObjectId::from_hex(head_trimmed).ok()
4718 }
4719}
4720
4721fn submodule_has_dirty_worktree_for_super_diff(
4724 super_worktree: &Path,
4725 rel_path: &str,
4726 recorded_oid: &ObjectId,
4727) -> bool {
4728 let flags = submodule_porcelain_flags(super_worktree, rel_path, *recorded_oid);
4729 flags.modified || flags.untracked
4730}
4731
4732#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
4734pub struct SubmodulePorcelainFlags {
4735 pub new_commits: bool,
4737 pub modified: bool,
4739 pub untracked: bool,
4741}
4742
4743pub fn submodule_porcelain_flags(
4749 super_worktree: &Path,
4750 rel_path: &str,
4751 recorded_oid: ObjectId,
4752) -> SubmodulePorcelainFlags {
4753 let sub_dir = super_worktree.join(rel_path);
4754 let Some(sub_git_dir) = submodule_embedded_git_dir(&sub_dir) else {
4755 return SubmodulePorcelainFlags::default();
4756 };
4757 let Some(sub_head) = read_submodule_head_oid(&sub_dir) else {
4758 return SubmodulePorcelainFlags::default();
4759 };
4760
4761 let new_commits = sub_head != recorded_oid;
4762
4763 let index_path = sub_git_dir.join("index");
4764 let sub_index = match crate::index::Index::load(&index_path) {
4765 Ok(ix) => ix,
4766 Err(_) => {
4767 return SubmodulePorcelainFlags {
4768 new_commits,
4769 ..Default::default()
4770 }
4771 }
4772 };
4773
4774 let tracked: std::collections::BTreeSet<String> = sub_index
4775 .entries
4776 .iter()
4777 .filter(|e| e.stage() == 0)
4778 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
4779 .collect();
4780 let untracked = submodule_dir_has_untracked_inner(&sub_dir, &sub_dir, &tracked, &sub_index);
4781
4782 let objects_dir = sub_git_dir.join("objects");
4783 let odb = Odb::new(&objects_dir);
4784
4785 let sub_head_tree = (|| -> Option<ObjectId> {
4786 let h = fs::read_to_string(sub_git_dir.join("HEAD")).ok()?;
4787 let h_str = h.trim();
4788 let commit_oid = if let Some(r) = h_str.strip_prefix("ref: ") {
4789 let oid_hex = fs::read_to_string(sub_git_dir.join(r)).ok()?;
4790 ObjectId::from_hex(oid_hex.trim()).ok()?
4791 } else {
4792 ObjectId::from_hex(h_str).ok()?
4793 };
4794 let obj = odb.read(&commit_oid).ok()?;
4795 let commit = parse_commit(&obj.data).ok()?;
4796 Some(commit.tree)
4797 })();
4798
4799 let staged_dirty = sub_head_tree
4800 .as_ref()
4801 .map(|t| diff_index_to_tree(&odb, &sub_index, Some(t), false).map(|v| !v.is_empty()))
4802 .unwrap_or(Ok(false));
4803 let staged_dirty = staged_dirty.unwrap_or(false);
4804
4805 let unstaged_dirty = diff_index_to_worktree(&odb, &sub_index, &sub_dir, false, true)
4806 .map(|v| !v.is_empty())
4807 .unwrap_or(false);
4808
4809 let mut modified = staged_dirty || unstaged_dirty;
4810
4811 for e in &sub_index.entries {
4816 if e.stage() != 0 || e.mode != 0o160000 {
4817 continue;
4818 }
4819 let child = String::from_utf8_lossy(&e.path).into_owned();
4820 let full_rel = if rel_path.is_empty() {
4821 child
4822 } else {
4823 format!("{rel_path}/{child}")
4824 };
4825 let nested = submodule_porcelain_flags(super_worktree, &full_rel, e.oid);
4826 modified |= nested.modified;
4827 }
4828
4829 SubmodulePorcelainFlags {
4830 new_commits,
4831 modified,
4832 untracked,
4833 }
4834}
4835
4836fn submodule_dir_has_untracked_inner(
4837 dir: &Path,
4838 root: &Path,
4839 tracked: &std::collections::BTreeSet<String>,
4840 owning_index: &Index,
4841) -> bool {
4842 let entries = match fs::read_dir(dir) {
4843 Ok(e) => e,
4844 Err(_) => return false,
4845 };
4846 let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
4847 sorted.sort_by_key(|e| e.file_name());
4848
4849 for entry in sorted {
4850 let name = entry.file_name().to_string_lossy().to_string();
4851 if name == ".git" {
4852 continue;
4853 }
4854 let path = entry.path();
4855 let rel = path
4856 .strip_prefix(root)
4857 .map(|p| p.to_string_lossy().to_string())
4858 .unwrap_or_else(|_| name.clone());
4859
4860 let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
4861 if is_dir {
4862 let is_gitlink = owning_index
4863 .get(rel.as_bytes(), 0)
4864 .is_some_and(|e| e.mode == 0o160000);
4865 if is_gitlink {
4866 let Some(nested_git) = submodule_embedded_git_dir(&path) else {
4867 continue;
4868 };
4869 let nested_index_path = nested_git.join("index");
4870 let Ok(nested_ix) = crate::index::Index::load(&nested_index_path) else {
4871 continue;
4872 };
4873 let nested_tracked: std::collections::BTreeSet<String> = nested_ix
4874 .entries
4875 .iter()
4876 .filter(|e| e.stage() == 0)
4877 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
4878 .collect();
4879 if submodule_dir_has_untracked_inner(&path, &path, &nested_tracked, &nested_ix) {
4880 return true;
4881 }
4882 } else if submodule_dir_has_untracked_inner(&path, root, tracked, owning_index) {
4883 return true;
4884 }
4885 } else if !tracked.contains(&rel) {
4886 return true;
4887 }
4888 }
4889 false
4890}