1use std::fs;
24use std::os::unix::fs::MetadataExt;
25use std::path::{Path, PathBuf};
26
27use crate::config::ConfigSet;
28use crate::diff_indent_heuristic;
29use crate::error::{Error, Result};
30use crate::index::{Index, IndexEntry};
31use crate::objects::{parse_commit, parse_tree, CommitData, ObjectId, ObjectKind, TreeEntry};
32use crate::odb::Odb;
33use crate::userdiff::FuncnameMatcher;
34
35fn imara_unified_hunk_slices(body: &str) -> Vec<&str> {
37 let mut starts: Vec<usize> = Vec::new();
38 if body.starts_with("@@") {
39 starts.push(0);
40 }
41 for (idx, _) in body.match_indices("\n@@ ") {
42 starts.push(idx + 1);
43 }
44 starts.push(body.len());
45 starts.windows(2).map(|w| &body[w[0]..w[1]]).collect()
46}
47
48fn imara_context_len_for_git(context_lines: usize, inter_hunk_context: usize) -> u32 {
51 (2usize
52 .saturating_mul(context_lines)
53 .saturating_add(inter_hunk_context))
54 .div_ceil(2)
55 .min(u32::MAX as usize) as u32
56}
57
58fn histogram_unified_body_raw(
59 old_content: &str,
60 new_content: &str,
61 context_lines: usize,
62 inter_hunk_context: usize,
63) -> String {
64 use imara_diff::{Algorithm, BasicLineDiffPrinter, Diff, InternedInput, UnifiedDiffConfig};
65
66 let input = InternedInput::new(old_content, new_content);
67 let mut diff = Diff::compute(Algorithm::Histogram, &input);
68 diff.postprocess_lines(&input);
69 let mut config = UnifiedDiffConfig::default();
70 config.context_len(imara_context_len_for_git(context_lines, inter_hunk_context));
71 let printer = BasicLineDiffPrinter(&input.interner);
72 diff.unified_diff(&printer, config, &input).to_string()
73}
74
75#[must_use]
79pub fn unified_diff_histogram_hunks_only(
80 old_content: &str,
81 new_content: &str,
82 context_lines: usize,
83 inter_hunk_context: usize,
84) -> String {
85 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context)
86}
87
88#[must_use]
90pub fn unified_diff_histogram_with_prefix_and_funcname(
91 old_content: &str,
92 new_content: &str,
93 old_path: &str,
94 new_path: &str,
95 context_lines: usize,
96 inter_hunk_context: usize,
97 src_prefix: &str,
98 dst_prefix: &str,
99 funcname_matcher: Option<&FuncnameMatcher>,
100 quote_path_fully: bool,
101) -> String {
102 use crate::quote_path::format_diff_path_with_prefix;
103
104 let body =
105 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context);
106
107 let mut output = String::new();
108 if old_path == "/dev/null" {
109 output.push_str("--- /dev/null\n");
110 } else if src_prefix.is_empty() {
111 output.push_str(&format!("--- {old_path}\n"));
112 } else {
113 output.push_str("--- ");
114 output.push_str(&format_diff_path_with_prefix(
115 src_prefix,
116 old_path,
117 quote_path_fully,
118 ));
119 output.push('\n');
120 }
121 if new_path == "/dev/null" {
122 output.push_str("+++ /dev/null\n");
123 } else if dst_prefix.is_empty() {
124 output.push_str(&format!("+++ {new_path}\n"));
125 } else {
126 output.push_str("+++ ");
127 output.push_str(&format_diff_path_with_prefix(
128 dst_prefix,
129 new_path,
130 quote_path_fully,
131 ));
132 output.push('\n');
133 }
134
135 let old_lines: Vec<&str> = old_content.lines().collect();
136 for hunk_str in imara_unified_hunk_slices(&body) {
137 if hunk_str.is_empty() {
138 continue;
139 }
140 if let Some(first_newline) = hunk_str.find('\n') {
141 let header_line = &hunk_str[..first_newline];
142 let rest = &hunk_str[first_newline..];
143 if let Some(func_ctx) =
144 extract_function_context(header_line, &old_lines, funcname_matcher)
145 {
146 output.push_str(header_line);
147 output.push(' ');
148 output.push_str(&func_ctx);
149 output.push_str(rest);
150 } else {
151 output.push_str(hunk_str);
152 }
153 } else {
154 output.push_str(hunk_str);
155 }
156 }
157
158 output
159}
160
161#[must_use]
163pub fn indent_heuristic_from_config(config: &ConfigSet) -> bool {
164 match config.get_bool("diff.indentHeuristic") {
165 Some(Ok(b)) => b,
166 Some(Err(_)) | None => true,
167 }
168}
169
170#[must_use]
172pub fn resolve_indent_heuristic(
173 config: &ConfigSet,
174 cli_indent_heuristic: bool,
175 cli_no_indent_heuristic: bool,
176) -> bool {
177 if cli_no_indent_heuristic {
178 false
179 } else if cli_indent_heuristic {
180 true
181 } else {
182 indent_heuristic_from_config(config)
183 }
184}
185
186#[must_use]
188pub fn parse_indent_heuristic_cli_flags(argv: &[String]) -> (bool, bool) {
189 let mut indent_heuristic = false;
190 let mut no_indent_heuristic = false;
191 for a in argv {
192 match a.as_str() {
193 "--indent-heuristic" => {
194 indent_heuristic = true;
195 no_indent_heuristic = false;
196 }
197 "--no-indent-heuristic" => {
198 no_indent_heuristic = true;
199 indent_heuristic = false;
200 }
201 _ => {}
202 }
203 }
204 (indent_heuristic, no_indent_heuristic)
205}
206
207#[must_use]
209pub fn diff_slice_ops_compacted(
210 old_lines: &[&str],
211 new_lines: &[&str],
212 algorithm: similar::Algorithm,
213 indent_heuristic: bool,
214) -> Vec<similar::DiffOp> {
215 diff_indent_heuristic::diff_slice_ops_compacted(
216 old_lines,
217 new_lines,
218 algorithm,
219 indent_heuristic,
220 )
221}
222
223#[must_use]
225pub fn map_new_to_old_lines_compacted(
226 old_joined: &str,
227 new_joined: &str,
228 algorithm: similar::Algorithm,
229 indent_heuristic: bool,
230 new_line_count: usize,
231) -> Vec<Option<usize>> {
232 let ops = diff_indent_heuristic::diff_lines_ops_compacted(
233 old_joined,
234 new_joined,
235 algorithm,
236 indent_heuristic,
237 );
238 diff_indent_heuristic::map_new_to_old_from_ops(&ops, new_line_count)
239}
240
241#[derive(Debug, Clone, Copy, PartialEq, Eq)]
243pub enum DiffStatus {
244 Added,
246 Deleted,
248 Modified,
250 Renamed,
252 Copied,
254 TypeChanged,
256 Unmerged,
258}
259
260impl DiffStatus {
261 #[must_use]
263 pub fn letter(&self) -> char {
264 match self {
265 Self::Added => 'A',
266 Self::Deleted => 'D',
267 Self::Modified => 'M',
268 Self::Renamed => 'R',
269 Self::Copied => 'C',
270 Self::TypeChanged => 'T',
271 Self::Unmerged => 'U',
272 }
273 }
274}
275
276#[derive(Debug, Clone, PartialEq, Eq)]
278pub struct DiffEntry {
279 pub status: DiffStatus,
281 pub old_path: Option<String>,
283 pub new_path: Option<String>,
285 pub old_mode: String,
287 pub new_mode: String,
289 pub old_oid: ObjectId,
291 pub new_oid: ObjectId,
293 pub score: Option<u32>,
295}
296
297impl DiffEntry {
298 #[must_use]
300 pub fn path(&self) -> &str {
301 self.new_path
302 .as_deref()
303 .or(self.old_path.as_deref())
304 .unwrap_or("")
305 }
306
307 #[must_use]
312 pub fn display_path(&self) -> String {
313 match self.status {
314 DiffStatus::Renamed | DiffStatus::Copied => {
315 let old = self.old_path.as_deref().unwrap_or("");
316 let new = self.new_path.as_deref().unwrap_or("");
317 if old.is_empty() || new.is_empty() {
318 self.path().to_owned()
319 } else {
320 format!("{old} -> {new}")
321 }
322 }
323 _ => self.path().to_owned(),
324 }
325 }
326}
327
328pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
330
331#[must_use]
333pub fn zero_oid() -> ObjectId {
334 ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
335 panic!("internal error: failed to create zero OID");
337 })
338}
339
340#[must_use]
342pub fn empty_blob_oid() -> ObjectId {
343 ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
344 panic!("internal error: failed to create empty blob OID");
346 })
347}
348
349pub fn diff_trees(
364 odb: &Odb,
365 old_tree_oid: Option<&ObjectId>,
366 new_tree_oid: Option<&ObjectId>,
367 prefix: &str,
368) -> Result<Vec<DiffEntry>> {
369 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
370}
371
372pub fn diff_trees_show_tree_entries(
376 odb: &Odb,
377 old_tree_oid: Option<&ObjectId>,
378 new_tree_oid: Option<&ObjectId>,
379 prefix: &str,
380) -> Result<Vec<DiffEntry>> {
381 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
382}
383
384fn diff_trees_opts(
385 odb: &Odb,
386 old_tree_oid: Option<&ObjectId>,
387 new_tree_oid: Option<&ObjectId>,
388 prefix: &str,
389 show_trees: bool,
390) -> Result<Vec<DiffEntry>> {
391 let old_entries = match old_tree_oid {
392 Some(oid) => read_tree(odb, oid)?,
393 None => Vec::new(),
394 };
395 let new_entries = match new_tree_oid {
396 Some(oid) => read_tree(odb, oid)?,
397 None => Vec::new(),
398 };
399
400 let mut result = Vec::new();
401 diff_tree_entries_opts(
402 odb,
403 &old_entries,
404 &new_entries,
405 prefix,
406 show_trees,
407 &mut result,
408 )?;
409 Ok(result)
410}
411
412fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
414 let obj = odb.read(oid)?;
415 if obj.kind != ObjectKind::Tree {
416 return Err(Error::CorruptObject(format!(
417 "expected tree, got {}",
418 obj.kind.as_str()
419 )));
420 }
421 parse_tree(&obj.data)
422}
423
424fn diff_tree_entries_opts(
426 odb: &Odb,
427 old: &[TreeEntry],
428 new: &[TreeEntry],
429 prefix: &str,
430 show_trees: bool,
431 result: &mut Vec<DiffEntry>,
432) -> Result<()> {
433 let mut oi = 0;
434 let mut ni = 0;
435
436 while oi < old.len() || ni < new.len() {
437 match (old.get(oi), new.get(ni)) {
438 (Some(o), Some(n)) => {
439 let cmp = crate::objects::tree_entry_cmp(
440 &o.name,
441 is_tree_mode(o.mode),
442 &n.name,
443 is_tree_mode(n.mode),
444 );
445 match cmp {
446 std::cmp::Ordering::Less => {
447 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
449 oi += 1;
450 }
451 std::cmp::Ordering::Greater => {
452 emit_added_opts(odb, n, prefix, show_trees, result)?;
454 ni += 1;
455 }
456 std::cmp::Ordering::Equal => {
457 if o.oid != n.oid || o.mode != n.mode {
459 let name_str = String::from_utf8_lossy(&o.name);
460 let path = format_path(prefix, &name_str);
461 if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
462 if show_trees {
464 result.push(DiffEntry {
465 status: DiffStatus::Modified,
466 old_path: Some(path.clone()),
467 new_path: Some(path.clone()),
468 old_mode: format_mode(o.mode),
469 new_mode: format_mode(n.mode),
470 old_oid: o.oid,
471 new_oid: n.oid,
472 score: None,
473 });
474 }
475 let nested = diff_trees_opts(
477 odb,
478 Some(&o.oid),
479 Some(&n.oid),
480 &path,
481 show_trees,
482 )?;
483 result.extend(nested);
484 } else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
485 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
487 emit_added_opts(odb, n, prefix, show_trees, result)?;
488 } else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
489 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
491 emit_added_opts(odb, n, prefix, show_trees, result)?;
492 } else {
493 let old_type = o.mode & 0o170000;
497 let new_type = n.mode & 0o170000;
498 result.push(DiffEntry {
499 status: if old_type != new_type {
500 DiffStatus::TypeChanged
501 } else {
502 DiffStatus::Modified
503 },
504 old_path: Some(path.clone()),
505 new_path: Some(path),
506 old_mode: format_mode(o.mode),
507 new_mode: format_mode(n.mode),
508 old_oid: o.oid,
509 new_oid: n.oid,
510 score: None,
511 });
512 }
513 }
514 oi += 1;
515 ni += 1;
516 }
517 }
518 }
519 (Some(o), None) => {
520 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
521 oi += 1;
522 }
523 (None, Some(n)) => {
524 emit_added_opts(odb, n, prefix, show_trees, result)?;
525 ni += 1;
526 }
527 (None, None) => break,
528 }
529 }
530
531 Ok(())
532}
533
534fn emit_deleted_opts(
535 odb: &Odb,
536 entry: &TreeEntry,
537 prefix: &str,
538 show_trees: bool,
539 result: &mut Vec<DiffEntry>,
540) -> Result<()> {
541 let name_str = String::from_utf8_lossy(&entry.name);
542 let path = format_path(prefix, &name_str);
543 if is_tree_mode(entry.mode) {
544 if show_trees {
545 result.push(DiffEntry {
546 status: DiffStatus::Deleted,
547 old_path: Some(path.clone()),
548 new_path: None,
549 old_mode: format_mode(entry.mode),
550 new_mode: "000000".to_owned(),
551 old_oid: entry.oid,
552 new_oid: zero_oid(),
553 score: None,
554 });
555 }
556 let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
558 result.extend(nested);
559 } else {
560 result.push(DiffEntry {
561 status: DiffStatus::Deleted,
562 old_path: Some(path.clone()),
563 new_path: None,
564 old_mode: format_mode(entry.mode),
565 new_mode: "000000".to_owned(),
566 old_oid: entry.oid,
567 new_oid: zero_oid(),
568 score: None,
569 });
570 }
571 Ok(())
572}
573
574fn emit_added_opts(
575 odb: &Odb,
576 entry: &TreeEntry,
577 prefix: &str,
578 show_trees: bool,
579 result: &mut Vec<DiffEntry>,
580) -> Result<()> {
581 let name_str = String::from_utf8_lossy(&entry.name);
582 let path = format_path(prefix, &name_str);
583 if is_tree_mode(entry.mode) {
584 if show_trees {
585 result.push(DiffEntry {
586 status: DiffStatus::Added,
587 old_path: None,
588 new_path: Some(path.clone()),
589 old_mode: "000000".to_owned(),
590 new_mode: format_mode(entry.mode),
591 old_oid: zero_oid(),
592 new_oid: entry.oid,
593 score: None,
594 });
595 }
596 let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
598 result.extend(nested);
599 } else {
600 result.push(DiffEntry {
601 status: DiffStatus::Added,
602 old_path: None,
603 new_path: Some(path),
604 old_mode: "000000".to_owned(),
605 new_mode: format_mode(entry.mode),
606 old_oid: zero_oid(),
607 new_oid: entry.oid,
608 score: None,
609 });
610 }
611 Ok(())
612}
613
614pub fn diff_index_to_tree(
634 odb: &Odb,
635 index: &Index,
636 tree_oid: Option<&ObjectId>,
637 ignore_submodules: bool,
638) -> Result<Vec<DiffEntry>> {
639 let tree_entries = match tree_oid {
641 Some(oid) => flatten_tree(odb, oid, "")?,
642 None => Vec::new(),
643 };
644
645 let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
647 std::collections::BTreeMap::new();
648 for entry in &tree_entries {
649 tree_map.insert(&entry.path, entry);
650 }
651
652 let mut result = Vec::new();
653 let mut stage0_paths = std::collections::BTreeSet::new();
654 let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
655 std::collections::BTreeMap::new();
656
657 for ie in &index.entries {
659 let path = String::from_utf8_lossy(&ie.path).to_string();
660 if ie.stage() == 0 && ie.intent_to_add() {
661 continue;
664 }
665 if ie.stage() != 0 {
666 let rank = match ie.stage() {
667 2 => 0u8,
668 3 => 1u8,
669 1 => 2u8,
670 _ => 3u8,
671 };
672 match unmerged_modes.get(&path) {
673 Some((existing_rank, _)) if *existing_rank <= rank => {}
674 _ => {
675 unmerged_modes.insert(path, (rank, ie.mode));
676 }
677 }
678 continue;
679 }
680 if ignore_submodules && ie.mode == 0o160000 {
681 let _ = tree_map.remove(path.as_str());
682 stage0_paths.insert(path.clone());
683 continue;
684 }
685 stage0_paths.insert(path.clone());
686 match tree_map.remove(path.as_str()) {
687 Some(te) => {
688 if te.oid != ie.oid || te.mode != ie.mode {
690 result.push(DiffEntry {
691 status: DiffStatus::Modified,
692 old_path: Some(path.clone()),
693 new_path: Some(path),
694 old_mode: format_mode(te.mode),
695 new_mode: format_mode(ie.mode),
696 old_oid: te.oid,
697 new_oid: ie.oid,
698 score: None,
699 });
700 }
701 }
702 None => {
703 result.push(DiffEntry {
705 status: DiffStatus::Added,
706 old_path: None,
707 new_path: Some(path),
708 old_mode: "000000".to_owned(),
709 new_mode: format_mode(ie.mode),
710 old_oid: zero_oid(),
711 new_oid: ie.oid,
712 score: None,
713 });
714 }
715 }
716 }
717
718 for (path, (_, mode)) in &unmerged_modes {
719 if stage0_paths.contains(path) {
720 continue;
721 }
722 tree_map.remove(path.as_str());
723 result.push(DiffEntry {
724 status: DiffStatus::Unmerged,
725 old_path: Some(path.clone()),
726 new_path: Some(path.clone()),
727 old_mode: "000000".to_owned(),
728 new_mode: format_mode(*mode),
729 old_oid: zero_oid(),
730 new_oid: zero_oid(),
731 score: None,
732 });
733 }
734
735 for (path, te) in tree_map {
737 if ignore_submodules && te.mode == 0o160000 {
738 continue;
739 }
740 result.push(DiffEntry {
741 status: DiffStatus::Deleted,
742 old_path: Some(path.to_owned()),
743 new_path: None,
744 old_mode: format_mode(te.mode),
745 new_mode: "000000".to_owned(),
746 old_oid: te.oid,
747 new_oid: zero_oid(),
748 score: None,
749 });
750 }
751
752 result.sort_by(|a, b| a.path().cmp(b.path()));
753 Ok(result)
754}
755
756pub fn diff_index_to_worktree(
781 odb: &Odb,
782 index: &Index,
783 work_tree: &Path,
784 ignore_submodule_untracked: bool,
785 simplify_gitlinks: bool,
786) -> Result<Vec<DiffEntry>> {
787 diff_index_to_worktree_with_options(
788 odb,
789 index,
790 work_tree,
791 DiffIndexToWorktreeOptions {
792 ignore_submodule_untracked,
793 simplify_gitlinks,
794 ..DiffIndexToWorktreeOptions::default()
795 },
796 )
797}
798
799#[derive(Debug, Clone, Copy, Default)]
801pub struct DiffIndexToWorktreeOptions {
802 pub index_mtime: Option<(u32, u32)>,
808 pub ignore_submodule_untracked: bool,
810 pub simplify_gitlinks: bool,
812}
813
814pub fn diff_index_to_worktree_with_options(
831 odb: &Odb,
832 index: &Index,
833 work_tree: &Path,
834 options: DiffIndexToWorktreeOptions,
835) -> Result<Vec<DiffEntry>> {
836 use crate::config::ConfigSet;
837 use crate::crlf;
838
839 let ignore_submodule_untracked = options.ignore_submodule_untracked;
840 let simplify_gitlinks = options.simplify_gitlinks;
841
842 let git_dir = work_tree.join(".git");
843 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
844 let conv = crlf::ConversionConfig::from_config(&config);
845 let attrs = crlf::load_gitattributes(work_tree);
846
847 let mut result = Vec::new();
848 let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
849 std::collections::BTreeMap::new();
850
851 for ie in &index.entries {
852 if ie.stage() != 0 {
853 let path = String::from_utf8_lossy(&ie.path).to_string();
854 let rank = match ie.stage() {
855 2 => 0u8,
856 3 => 1u8,
857 1 => 2u8,
858 _ => 3u8,
859 };
860 match unmerged_base.get(&path) {
861 Some((existing_rank, _)) if *existing_rank <= rank => {}
862 _ => {
863 unmerged_base.insert(path, (rank, ie));
864 }
865 }
866 continue;
867 }
868 if ie.skip_worktree() || ie.assume_unchanged() {
871 continue;
872 }
873 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
876 let is_intent_to_add = ie.intent_to_add();
877
878 if ie.mode == 0o160000 {
883 let sub_dir = work_tree.join(path_str_ref);
884 let sub_head_oid = read_submodule_head_oid(&sub_dir);
885 let ref_matches = match sub_head_oid {
886 Some(oid) => oid == ie.oid,
887 None => submodule_worktree_is_unpopulated_placeholder(&sub_dir),
888 };
889 if simplify_gitlinks {
890 if !ref_matches {
891 let path_owned = path_str_ref.to_owned();
892 let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
893 result.push(DiffEntry {
894 status: DiffStatus::Modified,
895 old_path: Some(path_owned.clone()),
896 new_path: Some(path_owned),
897 old_mode: format_mode(ie.mode),
898 new_mode: format_mode(ie.mode),
899 old_oid: ie.oid,
900 new_oid,
901 score: None,
902 });
903 }
904 continue;
905 }
906 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
907 if ignore_submodule_untracked {
908 flags.untracked = false;
909 }
910 let inner_dirty = flags.modified || flags.untracked;
911 if !ref_matches || inner_dirty {
912 let path_owned = path_str_ref.to_owned();
913 let new_oid = if !ref_matches {
914 sub_head_oid.unwrap_or_else(zero_oid)
915 } else {
916 zero_oid()
917 };
918 result.push(DiffEntry {
919 status: DiffStatus::Modified,
920 old_path: Some(path_owned.clone()),
921 new_path: Some(path_owned),
922 old_mode: format_mode(ie.mode),
923 new_mode: format_mode(ie.mode),
924 old_oid: ie.oid,
925 new_oid,
926 score: None,
927 });
928 }
929 continue;
930 }
931
932 let file_path = work_tree.join(path_str_ref);
933
934 if is_intent_to_add {
935 match fs::symlink_metadata(&file_path) {
936 Ok(meta) => {
937 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
938 let worktree_oid = hash_worktree_file(
939 odb,
940 &file_path,
941 &meta,
942 &conv,
943 &file_attrs,
944 path_str_ref,
945 None,
946 )?;
947 let worktree_mode = mode_from_metadata(&meta);
948 result.push(DiffEntry {
949 status: DiffStatus::Added,
950 old_path: None,
951 new_path: Some(path_str_ref.to_owned()),
952 old_mode: "000000".to_owned(),
953 new_mode: format_mode(worktree_mode),
954 old_oid: zero_oid(),
957 new_oid: worktree_oid,
958 score: None,
959 });
960 }
961 Err(e)
962 if e.kind() == std::io::ErrorKind::NotFound
963 || e.raw_os_error() == Some(20) =>
964 {
965 result.push(DiffEntry {
966 status: DiffStatus::Deleted,
967 old_path: Some(path_str_ref.to_owned()),
968 new_path: None,
969 old_mode: format_mode(ie.mode),
970 new_mode: "000000".to_owned(),
971 old_oid: ie.oid,
972 new_oid: zero_oid(),
973 score: None,
974 });
975 }
976 Err(e) => return Err(Error::Io(e)),
977 }
978 continue;
979 }
980
981 if has_symlink_in_path(work_tree, path_str_ref) {
984 result.push(DiffEntry {
985 status: DiffStatus::Deleted,
986 old_path: Some(path_str_ref.to_owned()),
987 new_path: None,
988 old_mode: format_mode(ie.mode),
989 new_mode: "000000".to_owned(),
990 old_oid: ie.oid,
991 new_oid: zero_oid(),
992 score: None,
993 });
994 continue;
995 }
996
997 match fs::symlink_metadata(&file_path) {
998 Ok(meta) if meta.is_dir() => {
999 result.push(DiffEntry {
1002 status: DiffStatus::Deleted,
1003 old_path: Some(path_str_ref.to_owned()),
1004 new_path: None,
1005 old_mode: format_mode(ie.mode),
1006 new_mode: String::new(),
1007 old_oid: ie.oid,
1008 new_oid: zero_oid(),
1009 score: None,
1010 });
1011 }
1012 Ok(meta) => {
1013 let worktree_mode = mode_from_metadata(&meta);
1014 let stat_same = stat_matches(ie, &meta);
1015 if stat_same && worktree_mode != ie.mode {
1017 let path_owned = path_str_ref.to_owned();
1018 result.push(DiffEntry {
1019 status: DiffStatus::Modified,
1020 old_path: Some(path_owned.clone()),
1021 new_path: Some(path_owned),
1022 old_mode: format_mode(ie.mode),
1023 new_mode: format_mode(worktree_mode),
1024 old_oid: ie.oid,
1025 new_oid: ie.oid,
1026 score: None,
1027 });
1028 continue;
1029 }
1030
1031 if stat_same && worktree_mode == ie.mode && !entry_is_racy(ie, options.index_mtime) {
1034 continue;
1035 }
1036
1037 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
1039 let worktree_oid = hash_worktree_file(
1040 odb,
1041 &file_path,
1042 &meta,
1043 &conv,
1044 &file_attrs,
1045 path_str_ref,
1046 Some(ie),
1047 )?;
1048
1049 let mut eff_oid = worktree_oid;
1053 if eff_oid != ie.oid {
1054 if let Ok(raw) = fs::read(&file_path) {
1055 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1056 if raw_oid == ie.oid {
1057 eff_oid = ie.oid;
1058 }
1059 }
1060 }
1061
1062 if eff_oid != ie.oid || worktree_mode != ie.mode {
1063 let path_owned = path_str_ref.to_owned();
1064 result.push(DiffEntry {
1065 status: DiffStatus::Modified,
1066 old_path: Some(path_owned.clone()),
1067 new_path: Some(path_owned),
1068 old_mode: format_mode(ie.mode),
1069 new_mode: format_mode(worktree_mode),
1070 old_oid: ie.oid,
1071 new_oid: eff_oid,
1072 score: None,
1073 });
1074 }
1075 }
1076 Err(e) if e.kind() == std::io::ErrorKind::NotFound
1077 || e.raw_os_error() == Some(20) => {
1078 result.push(DiffEntry {
1080 status: DiffStatus::Deleted,
1081 old_path: Some(path_str_ref.to_owned()),
1082 new_path: None,
1083 old_mode: format_mode(ie.mode),
1084 new_mode: "000000".to_owned(),
1085 old_oid: ie.oid,
1086 new_oid: zero_oid(),
1087 score: None,
1088 });
1089 }
1090 Err(e) => return Err(Error::Io(e)),
1091 }
1092 }
1093
1094 for (path, (_, base_entry)) in unmerged_base {
1095 let file_path = work_tree.join(&path);
1096 let wt_meta = match fs::symlink_metadata(&file_path) {
1097 Ok(meta) => Some(meta),
1098 Err(e)
1099 if e.kind() == std::io::ErrorKind::NotFound
1100 || e.raw_os_error() == Some(20) =>
1101 {
1102 None
1103 }
1104 Err(e) => return Err(Error::Io(e)),
1105 };
1106
1107 let new_mode = wt_meta.as_ref().map_or_else(
1108 || "000000".to_owned(),
1109 |meta| format_mode(mode_from_metadata(meta)),
1110 );
1111 result.push(DiffEntry {
1112 status: DiffStatus::Unmerged,
1113 old_path: Some(path.clone()),
1114 new_path: Some(path.clone()),
1115 old_mode: "000000".to_owned(),
1116 new_mode,
1117 old_oid: zero_oid(),
1118 new_oid: zero_oid(),
1119 score: None,
1120 });
1121
1122 if let Some(meta) = wt_meta {
1123 let file_attrs = crlf::get_file_attrs(&attrs, &path, false, &config);
1124 let wt_oid = hash_worktree_file(
1125 odb,
1126 &file_path,
1127 &meta,
1128 &conv,
1129 &file_attrs,
1130 &path,
1131 Some(base_entry),
1132 )?;
1133 let wt_mode = mode_from_metadata(&meta);
1134 if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
1135 result.push(DiffEntry {
1136 status: DiffStatus::Modified,
1137 old_path: Some(path.clone()),
1138 new_path: Some(path),
1139 old_mode: format_mode(base_entry.mode),
1140 new_mode: format_mode(wt_mode),
1141 old_oid: base_entry.oid,
1142 new_oid: wt_oid,
1143 score: None,
1144 });
1145 }
1146 }
1147 }
1148
1149 Ok(result)
1150}
1151
1152fn entry_is_racy(ie: &IndexEntry, index_mtime: Option<(u32, u32)>) -> bool {
1153 let Some((index_mtime_sec, index_mtime_nsec)) = index_mtime else {
1154 return false;
1155 };
1156 if index_mtime_sec == 0 {
1157 return false;
1158 }
1159 index_mtime_sec < ie.mtime_sec
1160 || (index_mtime_sec == ie.mtime_sec && index_mtime_nsec <= ie.mtime_nsec)
1161}
1162
1163pub fn worktree_differs_from_index_entry(
1171 odb: &Odb,
1172 work_tree: &Path,
1173 ie: &IndexEntry,
1174 ignore_submodule_untracked: bool,
1175) -> Result<bool> {
1176 use crate::config::ConfigSet;
1177 use crate::crlf;
1178
1179 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
1180 let file_path = work_tree.join(path_str_ref);
1181
1182 if ie.mode == 0o160000 {
1183 let sub_head_oid = read_submodule_head(&file_path);
1184 let ref_matches = match sub_head_oid {
1185 Some(oid) => oid == ie.oid,
1186 None => submodule_worktree_is_unpopulated_placeholder(&file_path),
1187 };
1188 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
1189 if ignore_submodule_untracked {
1190 flags.untracked = false;
1191 }
1192 return Ok(!ref_matches || flags.modified || flags.untracked);
1193 }
1194
1195 let meta = match fs::symlink_metadata(&file_path) {
1196 Ok(m) => m,
1197 Err(e)
1198 if e.kind() == std::io::ErrorKind::NotFound
1199 || e.raw_os_error() == Some(20) =>
1200 {
1201 return Ok(true);
1202 }
1203 Err(e) => return Err(Error::Io(e)),
1204 };
1205
1206 if meta.is_dir() {
1207 return Ok(true);
1208 }
1209
1210 let worktree_mode = mode_from_metadata(&meta);
1211 if worktree_mode != ie.mode {
1212 return Ok(true);
1213 }
1214
1215 let git_dir = work_tree.join(".git");
1216 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1217 let conv = crlf::ConversionConfig::from_config(&config);
1218 let attrs = crlf::load_gitattributes(work_tree);
1219 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
1220 let worktree_oid = hash_worktree_file(
1221 odb,
1222 &file_path,
1223 &meta,
1224 &conv,
1225 &file_attrs,
1226 path_str_ref,
1227 Some(ie),
1228 )?;
1229
1230 let mut eff_oid = worktree_oid;
1231 if eff_oid != ie.oid {
1232 if let Ok(raw) = fs::read(&file_path) {
1233 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1234 if raw_oid == ie.oid {
1235 eff_oid = ie.oid;
1236 }
1237 }
1238 }
1239
1240 Ok(eff_oid != ie.oid)
1241}
1242
1243pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
1244 if meta.len() as u32 != ie.size {
1246 return false;
1247 }
1248 if meta.mtime() as u32 != ie.mtime_sec {
1250 return false;
1251 }
1252 if meta.mtime_nsec() as u32 != ie.mtime_nsec {
1253 return false;
1254 }
1255 if meta.ctime() as u32 != ie.ctime_sec {
1257 return false;
1258 }
1259 if meta.ctime_nsec() as u32 != ie.ctime_nsec {
1260 return false;
1261 }
1262 if meta.ino() as u32 != ie.ino {
1264 return false;
1265 }
1266 if meta.dev() as u32 != ie.dev {
1267 return false;
1268 }
1269 true
1270}
1271
1272fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
1275 let mut check = work_tree.to_path_buf();
1276 let components: Vec<&str> = rel_path.split('/').collect();
1277 for component in &components[..components.len().saturating_sub(1)] {
1279 check.push(component);
1280 match fs::symlink_metadata(&check) {
1281 Ok(meta) if meta.file_type().is_symlink() => return true,
1282 _ => {}
1283 }
1284 }
1285 false
1286}
1287
1288pub fn hash_worktree_file(
1289 odb: &Odb,
1290 path: &Path,
1291 meta: &fs::Metadata,
1292 conv: &crate::crlf::ConversionConfig,
1293 file_attrs: &crate::crlf::FileAttrs,
1294 rel_path: &str,
1295 index_entry: Option<&IndexEntry>,
1296) -> Result<ObjectId> {
1297 let prior_blob: Option<Vec<u8>> = index_entry
1298 .filter(|e| e.oid != zero_oid())
1299 .and_then(|e| odb.read(&e.oid).ok().map(|o| o.data));
1300 let data = if meta.file_type().is_symlink() {
1301 let target = fs::read_link(path)?;
1303 target.to_string_lossy().into_owned().into_bytes()
1304 } else if meta.is_dir() {
1305 Vec::new()
1308 } else {
1309 let raw = fs::read(path)?;
1310 let opts = crate::crlf::ConvertToGitOpts {
1313 index_blob: prior_blob.as_deref(),
1314 renormalize: false,
1315 check_safecrlf: false,
1316 };
1317 crate::crlf::convert_to_git_with_opts(&raw, rel_path, conv, file_attrs, opts).unwrap_or(raw)
1318 };
1319
1320 Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
1321}
1322
1323pub fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
1325 if meta.file_type().is_symlink() {
1326 0o120000
1327 } else if meta.mode() & 0o111 != 0 {
1328 0o100755
1329 } else {
1330 0o100644
1331 }
1332}
1333
1334pub fn diff_tree_to_worktree(
1351 odb: &Odb,
1352 tree_oid: Option<&ObjectId>,
1353 work_tree: &Path,
1354 index: &Index,
1355) -> Result<Vec<DiffEntry>> {
1356 use crate::config::ConfigSet;
1357 use crate::crlf;
1358
1359 let git_dir = work_tree.join(".git");
1360 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1361 let conv = crlf::ConversionConfig::from_config(&config);
1362 let attrs = crlf::load_gitattributes(work_tree);
1363
1364 let tree_flat = match tree_oid {
1366 Some(oid) => flatten_tree(odb, oid, "")?,
1367 None => Vec::new(),
1368 };
1369 let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
1370 tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
1371
1372 let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
1374 std::collections::BTreeMap::new();
1375 let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1376 let mut stage0_paths: std::collections::BTreeSet<Vec<u8>> = std::collections::BTreeSet::new();
1377 for ie in &index.entries {
1378 if ie.stage() != 0 {
1379 continue;
1380 }
1381 let path = String::from_utf8_lossy(&ie.path).to_string();
1382 index_entries.insert(&ie.path, ie);
1383 index_paths.insert(path);
1384 stage0_paths.insert(ie.path.clone());
1385 }
1386
1387 let mut unmerged_only_paths: std::collections::BTreeSet<String> =
1390 std::collections::BTreeSet::new();
1391 for ie in &index.entries {
1392 if !(1..=3).contains(&ie.stage()) {
1393 continue;
1394 }
1395 if stage0_paths.contains(&ie.path) {
1396 continue;
1397 }
1398 unmerged_only_paths.insert(String::from_utf8_lossy(&ie.path).into_owned());
1399 }
1400
1401 let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1403 all_paths.extend(tree_map.keys().cloned());
1404 all_paths.extend(index_paths.iter().cloned());
1405 all_paths.extend(unmerged_only_paths.iter().cloned());
1406
1407 let mut result = Vec::new();
1408
1409 for path in &all_paths {
1410 if index_entries
1411 .get(path.as_bytes())
1412 .is_some_and(|ie| ie.skip_worktree())
1413 {
1414 continue;
1417 }
1418
1419 let tree_entry = tree_map.get(path.as_str());
1420
1421 let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
1423 || index_entries
1424 .get(path.as_bytes())
1425 .is_some_and(|ie| ie.mode == 0o160000);
1426 if is_gitlink {
1427 if let Some(te) = tree_entry {
1428 let sub_dir = work_tree.join(path);
1429 let sub_head = read_submodule_head_oid(&sub_dir);
1430 let index_oid = index_entries
1431 .get(path.as_bytes())
1432 .filter(|ie| ie.mode == 0o160000)
1433 .map(|ie| ie.oid);
1434 let index_matches_tree = index_oid.is_some_and(|oid| oid == te.oid);
1435 let head_differs = sub_head.as_ref() != Some(&te.oid);
1436 let dirty_while_aligned = index_matches_tree
1437 && !head_differs
1438 && submodule_has_dirty_worktree_for_super_diff(work_tree, path, &te.oid);
1439 if head_differs || dirty_while_aligned {
1440 let new_oid = if head_differs { zero_oid() } else { te.oid };
1444 result.push(DiffEntry {
1445 status: DiffStatus::Modified,
1446 old_path: Some(path.clone()),
1447 new_path: Some(path.clone()),
1448 old_mode: format_mode(te.mode),
1449 new_mode: format_mode(te.mode),
1450 old_oid: te.oid,
1451 new_oid,
1452 score: None,
1453 });
1454 }
1455 }
1456 continue;
1457 }
1458
1459 let file_path = work_tree.join(path);
1460
1461 let wt_meta = match fs::symlink_metadata(&file_path) {
1462 Ok(m) => Some(m),
1463 Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
1464 Err(e) => return Err(Error::Io(e)),
1465 };
1466
1467 if unmerged_only_paths.contains(path) {
1468 if let (Some(te), Some(meta)) = (tree_entry, wt_meta.as_ref()) {
1469 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1470 let wt_oid =
1471 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, None)?;
1472 let wt_mode = mode_from_metadata(meta);
1473 if wt_oid != te.oid || wt_mode != te.mode {
1474 result.push(DiffEntry {
1475 status: DiffStatus::Modified,
1476 old_path: Some(path.clone()),
1477 new_path: Some(path.clone()),
1478 old_mode: format_mode(te.mode),
1479 new_mode: format_mode(wt_mode),
1480 old_oid: te.oid,
1481 new_oid: wt_oid,
1482 score: None,
1483 });
1484 }
1485 }
1486 continue;
1487 }
1488
1489 match (tree_entry, wt_meta) {
1490 (Some(te), Some(ref meta)) => {
1491 let wt_mode = mode_from_metadata(meta);
1492 let Some(ie) = index_entries.get(path.as_bytes()) else {
1493 continue;
1494 };
1495
1496 let index_matches_tree = ie.oid == te.oid && ie.mode == te.mode;
1497
1498 if index_matches_tree && wt_mode == te.mode && stat_matches(ie, meta) {
1500 continue;
1501 }
1502
1503 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1504 let idx_ent = index_entries.get(path.as_bytes()).copied();
1505
1506 if ie.oid == te.oid && ie.mode != te.mode {
1508 result.push(DiffEntry {
1509 status: DiffStatus::Modified,
1510 old_path: Some(path.clone()),
1511 new_path: Some(path.clone()),
1512 old_mode: format_mode(te.mode),
1513 new_mode: format_mode(ie.mode),
1514 old_oid: te.oid,
1515 new_oid: te.oid,
1516 score: None,
1517 });
1518 continue;
1519 }
1520
1521 if index_matches_tree {
1524 let wt_oid = hash_worktree_file(
1525 odb,
1526 &file_path,
1527 meta,
1528 &conv,
1529 &file_attrs,
1530 path,
1531 idx_ent,
1532 )?;
1533 let mut eff_oid = wt_oid;
1534 if eff_oid != te.oid {
1535 if let Ok(raw) = fs::read(&file_path) {
1536 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1537 if raw_oid == te.oid {
1538 eff_oid = te.oid;
1539 }
1540 }
1541 }
1542 if eff_oid != te.oid {
1543 result.push(DiffEntry {
1544 status: DiffStatus::Modified,
1545 old_path: Some(path.clone()),
1546 new_path: Some(path.clone()),
1547 old_mode: format_mode(te.mode),
1548 new_mode: format_mode(wt_mode),
1549 old_oid: te.oid,
1550 new_oid: eff_oid,
1551 score: None,
1552 });
1553 } else if wt_mode != te.mode {
1554 result.push(DiffEntry {
1555 status: DiffStatus::Modified,
1556 old_path: Some(path.clone()),
1557 new_path: Some(path.clone()),
1558 old_mode: format_mode(te.mode),
1559 new_mode: format_mode(wt_mode),
1560 old_oid: te.oid,
1561 new_oid: te.oid,
1562 score: None,
1563 });
1564 }
1565 continue;
1566 }
1567
1568 let wt_oid =
1570 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, idx_ent)?;
1571 let mut eff_oid = wt_oid;
1572 if eff_oid != te.oid {
1573 if let Ok(raw) = fs::read(&file_path) {
1574 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1575 if raw_oid == te.oid {
1576 eff_oid = te.oid;
1577 }
1578 }
1579 }
1580 if eff_oid != te.oid || wt_mode != te.mode {
1581 result.push(DiffEntry {
1582 status: DiffStatus::Modified,
1583 old_path: Some(path.clone()),
1584 new_path: Some(path.clone()),
1585 old_mode: format_mode(te.mode),
1586 new_mode: format_mode(wt_mode),
1587 old_oid: te.oid,
1588 new_oid: eff_oid,
1589 score: None,
1590 });
1591 }
1592 }
1593 (Some(te), None) => {
1594 result.push(DiffEntry {
1596 status: DiffStatus::Deleted,
1597 old_path: Some(path.clone()),
1598 new_path: None,
1599 old_mode: format_mode(te.mode),
1600 new_mode: "000000".to_owned(),
1601 old_oid: te.oid,
1602 new_oid: zero_oid(),
1603 score: None,
1604 });
1605 }
1606 (None, Some(ref meta)) => {
1607 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1609 let wt_oid = hash_worktree_file(
1610 odb,
1611 &file_path,
1612 meta,
1613 &conv,
1614 &file_attrs,
1615 path,
1616 index_entries.get(path.as_bytes()).copied(),
1617 )?;
1618 let wt_mode = mode_from_metadata(meta);
1619 result.push(DiffEntry {
1620 status: DiffStatus::Added,
1621 old_path: None,
1622 new_path: Some(path.clone()),
1623 old_mode: "000000".to_owned(),
1624 new_mode: format_mode(wt_mode),
1625 old_oid: zero_oid(),
1626 new_oid: wt_oid,
1627 score: None,
1628 });
1629 }
1630 (None, None) => {
1631 }
1633 }
1634 }
1635
1636 result.sort_by(|a, b| a.path().cmp(b.path()));
1637 Ok(result)
1638}
1639
1640fn read_added_entry_bytes(
1643 odb: &Odb,
1644 entry: &DiffEntry,
1645 work_root: Option<&Path>,
1646) -> Option<Vec<u8>> {
1647 if entry.new_oid != zero_oid() {
1648 return odb.read(&entry.new_oid).ok().map(|obj| obj.data);
1649 }
1650 let path = entry.new_path.as_deref()?;
1651 let root = work_root?;
1652 fs::read(root.join(path)).ok()
1653}
1654
1655fn modified_as_copy_from_sources(
1656 odb: &Odb,
1657 work_root: Option<&Path>,
1658 e: &DiffEntry,
1659 threshold: u32,
1660 sources: &[(String, ObjectId, bool)],
1661 source_contents: &[Option<Vec<u8>>],
1662 source_tree_entries: &[(String, String, ObjectId)],
1663) -> Option<DiffEntry> {
1664 fn regular_file_mode(mode: &str) -> bool {
1665 mode == "100644" || mode == "100755"
1666 }
1667
1668 if e.status != DiffStatus::Modified || !regular_file_mode(&e.new_mode) {
1669 return None;
1670 }
1671 let new_data = read_added_entry_bytes(odb, e, work_root)?;
1672 let new_oid_eff = if e.new_oid != zero_oid() {
1673 e.new_oid
1674 } else {
1675 Odb::hash_object_data(ObjectKind::Blob, &new_data)
1676 };
1677
1678 let mut best: Option<(usize, u32)> = None;
1679 for (si, (src_path, src_oid, is_deleted)) in sources.iter().enumerate() {
1680 if *is_deleted {
1681 continue;
1682 }
1683 if e.new_path.as_deref() == Some(src_path.as_str()) {
1684 continue;
1685 }
1686 let src_mode_str = source_tree_entries
1687 .iter()
1688 .find(|(p, _, _)| p == src_path)
1689 .map(|(_, m, _)| m.as_str())
1690 .unwrap_or("100644");
1691 if !regular_file_mode(src_mode_str) {
1692 continue;
1693 }
1694
1695 let score = if *src_oid == new_oid_eff {
1696 100
1697 } else {
1698 match (&source_contents[si], Some(new_data.as_slice())) {
1699 (Some(old_data), Some(nd)) => compute_similarity(old_data, nd),
1700 _ => 0,
1701 }
1702 };
1703 if score >= threshold {
1704 let replace = match best {
1705 None => true,
1706 Some((_, s)) => score > s,
1707 };
1708 if replace {
1709 best = Some((si, score));
1710 }
1711 }
1712 }
1713
1714 let (si, score) = best?;
1715 let (src_path, src_oid, _) = &sources[si];
1716 let src_mode = source_tree_entries
1717 .iter()
1718 .find(|(p, _, _)| p == src_path)
1719 .map(|(_, m, _)| m.clone())
1720 .unwrap_or_else(|| e.old_mode.clone());
1721
1722 Some(DiffEntry {
1723 status: DiffStatus::Copied,
1724 old_path: Some(src_path.clone()),
1725 new_path: e.new_path.clone(),
1726 old_mode: src_mode,
1727 new_mode: e.new_mode.clone(),
1728 old_oid: *src_oid,
1729 new_oid: e.new_oid,
1730 score: Some(score),
1731 })
1732}
1733
1734pub fn detect_renames(
1746 odb: &Odb,
1747 work_root: Option<&Path>,
1748 entries: Vec<DiffEntry>,
1749 threshold: u32,
1750) -> Vec<DiffEntry> {
1751 let mut deleted: Vec<DiffEntry> = Vec::new();
1753 let mut added: Vec<DiffEntry> = Vec::new();
1754 let mut others: Vec<DiffEntry> = Vec::new();
1755
1756 for entry in entries {
1757 match entry.status {
1758 DiffStatus::Deleted => deleted.push(entry),
1759 DiffStatus::Added => added.push(entry),
1760 _ => others.push(entry),
1761 }
1762 }
1763
1764 if deleted.is_empty() || added.is_empty() {
1765 let mut result = others;
1767 result.extend(deleted);
1768 result.extend(added);
1769 result.sort_by(|a, b| a.path().cmp(b.path()));
1770 return result;
1771 }
1772
1773 let deleted_contents: Vec<Option<Vec<u8>>> = deleted
1775 .iter()
1776 .map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
1777 .collect();
1778
1779 let added_contents: Vec<Option<Vec<u8>>> = added
1781 .iter()
1782 .map(|a| read_added_entry_bytes(odb, a, work_root))
1783 .collect();
1784
1785 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1788
1789 fn is_regularish_mode(mode: &str) -> bool {
1790 mode == "100644" || mode == "100755"
1791 }
1792
1793 for (di, del) in deleted.iter().enumerate() {
1794 for (ai, add) in added.iter().enumerate() {
1795 if del.old_oid == add.new_oid {
1797 scores.push((100, di, ai));
1798 continue;
1799 }
1800
1801 if !is_regularish_mode(&del.old_mode) || !is_regularish_mode(&add.new_mode) {
1804 continue;
1805 }
1806
1807 let score = match (&deleted_contents[di], &added_contents[ai]) {
1808 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
1809 _ => 0,
1810 };
1811
1812 if score >= threshold {
1813 scores.push((score, di, ai));
1814 }
1815 }
1816 }
1817
1818 scores.sort_by(|a, b| {
1821 let a_same = same_basename(&deleted[a.1], &added[a.2]);
1822 let b_same = same_basename(&deleted[b.1], &added[b.2]);
1823 b_same.cmp(&a_same).then_with(|| b.0.cmp(&a.0))
1824 });
1825
1826 let mut used_deleted = vec![false; deleted.len()];
1827 let mut used_added = vec![false; added.len()];
1828 let mut renames: Vec<DiffEntry> = Vec::new();
1829
1830 for (score, di, ai) in &scores {
1831 if used_deleted[*di] || used_added[*ai] {
1832 continue;
1833 }
1834 used_deleted[*di] = true;
1835 used_added[*ai] = true;
1836
1837 let del = &deleted[*di];
1838 let add = &added[*ai];
1839
1840 renames.push(DiffEntry {
1841 status: DiffStatus::Renamed,
1842 old_path: del.old_path.clone(),
1843 new_path: add.new_path.clone(),
1844 old_mode: del.old_mode.clone(),
1845 new_mode: add.new_mode.clone(),
1846 old_oid: del.old_oid,
1847 new_oid: add.new_oid,
1848 score: Some(*score),
1849 });
1850 }
1851
1852 let mut result = others;
1854 result.extend(renames);
1855 for (i, entry) in deleted.into_iter().enumerate() {
1856 if !used_deleted[i] {
1857 result.push(entry);
1858 }
1859 }
1860 for (i, entry) in added.into_iter().enumerate() {
1861 if !used_added[i] {
1862 result.push(entry);
1863 }
1864 }
1865
1866 result.sort_by(|a, b| a.path().cmp(b.path()));
1867 result
1868}
1869
1870pub fn detect_copies(
1881 odb: &Odb,
1882 work_root: Option<&Path>,
1883 entries: Vec<DiffEntry>,
1884 threshold: u32,
1885 find_copies_harder: bool,
1886 source_tree_entries: &[(String, String, ObjectId)],
1887) -> Vec<DiffEntry> {
1888 use std::collections::{HashMap, HashSet};
1889
1890 let mut deleted: Vec<DiffEntry> = Vec::new();
1892 let mut added: Vec<DiffEntry> = Vec::new();
1893 let mut others: Vec<DiffEntry> = Vec::new();
1894
1895 for entry in entries {
1896 match entry.status {
1897 DiffStatus::Deleted => deleted.push(entry),
1898 DiffStatus::Added => added.push(entry),
1899 _ => others.push(entry),
1900 }
1901 }
1902
1903 let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
1907
1908 for entry in &deleted {
1909 if let Some(ref path) = entry.old_path {
1910 deleted_source_idx.insert(path.clone(), sources.len());
1911 sources.push((path.clone(), entry.old_oid, true));
1912 }
1913 }
1914
1915 for entry in &others {
1918 if matches!(entry.status, DiffStatus::Modified | DiffStatus::TypeChanged) {
1919 if let Some(ref old_path) = entry.old_path {
1920 if !sources.iter().any(|(p, _, _)| p == old_path) {
1921 sources.push((old_path.clone(), entry.old_oid, false));
1922 }
1923 }
1924 }
1925 }
1926
1927 if find_copies_harder {
1929 for (path, _mode, oid) in source_tree_entries {
1930 if !sources.iter().any(|(p, _, _)| p == path) {
1931 sources.push((path.clone(), *oid, false));
1932 }
1933 }
1934 }
1935
1936 if sources.is_empty() {
1937 let mut result = others;
1938 result.extend(deleted);
1939 result.extend(added);
1940 result.sort_by(|a, b| a.path().cmp(b.path()));
1941 return result;
1942 }
1943
1944 let source_contents: Vec<Option<Vec<u8>>> = sources
1946 .iter()
1947 .map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
1948 .collect();
1949
1950 let mut result_entries: Vec<DiffEntry> = Vec::new();
1951 let mut renamed_deleted: HashSet<usize> = HashSet::new();
1952 let mut used_added2 = vec![false; added.len()];
1953
1954 if !added.is_empty() {
1955 let added_contents: Vec<Option<Vec<u8>>> = added
1957 .iter()
1958 .map(|a| read_added_entry_bytes(odb, a, work_root))
1959 .collect();
1960
1961 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1963 for (si, (src_path, src_oid, _)) in sources.iter().enumerate() {
1964 for (ai, add) in added.iter().enumerate() {
1965 if add.new_path.as_deref() == Some(src_path.as_str()) {
1968 continue;
1969 }
1970 let add_oid = if add.new_oid != zero_oid() {
1971 add.new_oid
1972 } else if let Some(ref data) = added_contents[ai] {
1973 Odb::hash_object_data(ObjectKind::Blob, data)
1974 } else {
1975 zero_oid()
1976 };
1977 if *src_oid == add_oid {
1978 scores.push((100, si, ai));
1979 continue;
1980 }
1981 let score = match (&source_contents[si], &added_contents[ai]) {
1982 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
1983 _ => 0,
1984 };
1985 if score >= threshold {
1986 scores.push((score, si, ai));
1987 }
1988 }
1989 }
1990
1991 scores.sort_by(|a, b| b.0.cmp(&a.0));
1993
1994 let mut used_added = vec![false; added.len()];
1996 let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
1997 for &(score, si, ai) in &scores {
1998 if used_added[ai] {
1999 continue;
2000 }
2001 used_added[ai] = true;
2002 source_to_added.entry(si).or_default().push((ai, score));
2003 }
2004
2005 for (&si, assignments_for_src) in &source_to_added {
2007 let (_, _, is_deleted) = &sources[si];
2008 if *is_deleted && !assignments_for_src.is_empty() {
2009 let rename_ai = assignments_for_src
2012 .iter()
2013 .max_by_key(|(ai, _score)| added[*ai].path().to_string())
2014 .map(|(ai, _)| *ai);
2015
2016 for &(ai, score) in assignments_for_src {
2017 let (ref src_path, _, _) = sources[si];
2018 let add = &added[ai];
2019 let src_mode = source_tree_entries
2020 .iter()
2021 .find(|(p, _, _)| p == src_path)
2022 .map(|(_, m, _)| m.clone())
2023 .unwrap_or_else(|| add.old_mode.clone());
2024
2025 let is_rename = Some(ai) == rename_ai;
2026 result_entries.push(DiffEntry {
2027 status: if is_rename {
2028 DiffStatus::Renamed
2029 } else {
2030 DiffStatus::Copied
2031 },
2032 old_path: Some(src_path.clone()),
2033 new_path: add.new_path.clone(),
2034 old_mode: src_mode,
2035 new_mode: add.new_mode.clone(),
2036 old_oid: sources[si].1,
2037 new_oid: add.new_oid,
2038 score: Some(score),
2039 });
2040 used_added2[ai] = true;
2041 }
2042 renamed_deleted.insert(si);
2043 } else {
2044 for &(ai, score) in assignments_for_src {
2046 let (ref src_path, _, _) = sources[si];
2047 let add = &added[ai];
2048 let src_mode = source_tree_entries
2049 .iter()
2050 .find(|(p, _, _)| p == src_path)
2051 .map(|(_, m, _)| m.clone())
2052 .unwrap_or_else(|| add.old_mode.clone());
2053
2054 result_entries.push(DiffEntry {
2055 status: DiffStatus::Copied,
2056 old_path: Some(src_path.clone()),
2057 new_path: add.new_path.clone(),
2058 old_mode: src_mode,
2059 new_mode: add.new_mode.clone(),
2060 old_oid: sources[si].1,
2061 new_oid: add.new_oid,
2062 score: Some(score),
2063 });
2064 used_added2[ai] = true;
2065 }
2066 }
2067 }
2068 }
2069
2070 for entry in deleted.into_iter() {
2072 if let Some(ref path) = entry.old_path {
2073 if let Some(&si) = deleted_source_idx.get(path) {
2074 if renamed_deleted.contains(&si) {
2075 continue;
2077 }
2078 }
2079 }
2080 result_entries.push(entry);
2081 }
2082
2083 let mut result = others;
2084 result.extend(result_entries);
2085 for (i, entry) in added.into_iter().enumerate() {
2087 if !used_added2[i] {
2088 result.push(entry);
2089 }
2090 }
2091
2092 let mut final_result = Vec::with_capacity(result.len());
2093 for e in result {
2094 if let Some(c) = modified_as_copy_from_sources(
2095 odb,
2096 work_root,
2097 &e,
2098 threshold,
2099 &sources,
2100 &source_contents,
2101 source_tree_entries,
2102 ) {
2103 final_result.push(c);
2104 } else {
2105 final_result.push(e);
2106 }
2107 }
2108
2109 final_result.sort_by(|a, b| a.path().cmp(b.path()));
2110 final_result
2111}
2112
2113pub fn status_apply_rename_copy_detection(
2123 odb: &Odb,
2124 unstaged_raw: Vec<DiffEntry>,
2125 threshold: u32,
2126 copies: bool,
2127 head_tree: Option<&ObjectId>,
2128) -> Result<Vec<DiffEntry>> {
2129 let after_renames = detect_renames(odb, None, unstaged_raw, threshold);
2130 if !copies {
2131 return Ok(after_renames);
2132 }
2133 let source_tree_entries: Vec<(String, String, ObjectId)> = match head_tree {
2134 Some(oid) => flatten_tree(odb, oid, "")?
2135 .into_iter()
2136 .map(|e| (e.path, format_mode(e.mode), e.oid))
2137 .collect(),
2138 None => Vec::new(),
2139 };
2140 Ok(detect_copies(
2141 odb,
2142 None,
2143 after_renames,
2144 threshold,
2145 false,
2146 &source_tree_entries,
2147 ))
2148}
2149
2150pub fn format_rename_path(old: &str, new: &str) -> String {
2158 let ob = old.as_bytes();
2159 let nb = new.as_bytes();
2160
2161 let pfx = {
2163 let mut last_sep = 0usize;
2164 let min_len = ob.len().min(nb.len());
2165 for i in 0..min_len {
2166 if ob[i] != nb[i] {
2167 break;
2168 }
2169 if ob[i] == b'/' {
2170 last_sep = i + 1;
2171 }
2172 }
2173 last_sep
2174 };
2175
2176 let mut sfx = {
2178 let mut last_sep = 0usize;
2179 let min_len = ob.len().min(nb.len());
2180 for i in 0..min_len {
2181 let oi = ob.len() - 1 - i;
2182 let ni = nb.len() - 1 - i;
2183 if ob[oi] != nb[ni] {
2184 break;
2185 }
2186 if ob[oi] == b'/' {
2187 last_sep = i + 1;
2188 }
2189 }
2190 last_sep
2191 };
2192
2193 let mut sfx_at_old = ob.len() - sfx;
2195 let mut sfx_at_new = nb.len() - sfx;
2196
2197 while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
2200 let suffix_bytes = &ob[sfx_at_old..];
2202 let mut new_sfx = 0;
2203 for (i, &b) in suffix_bytes.iter().enumerate().skip(1) {
2205 if b == b'/' {
2206 new_sfx = sfx - i;
2207 break;
2208 }
2209 }
2210 if new_sfx == 0 || new_sfx >= sfx {
2211 sfx_at_old = ob.len();
2212 sfx_at_new = nb.len();
2213 break;
2214 }
2215 sfx = new_sfx;
2216 sfx_at_old = ob.len() - sfx;
2217 sfx_at_new = nb.len() - sfx;
2218 }
2219
2220 let prefix = &old[..pfx];
2227 let suffix = &old[sfx_at_old..];
2228 let old_mid = if pfx <= sfx_at_old {
2229 &old[pfx..sfx_at_old]
2230 } else {
2231 ""
2232 };
2233 let new_mid = if pfx <= sfx_at_new {
2234 &new[pfx..sfx_at_new]
2235 } else {
2236 ""
2237 };
2238
2239 if prefix.is_empty() && suffix.is_empty() {
2240 return format!("{old} => {new}");
2241 }
2242
2243 format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
2244}
2245
2246fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
2248 let old = del.old_path.as_deref().unwrap_or("");
2249 let new = add.new_path.as_deref().unwrap_or("");
2250 let old_base = old.rsplit('/').next().unwrap_or(old);
2251 let new_base = new.rsplit('/').next().unwrap_or(new);
2252 old_base == new_base && !old_base.is_empty()
2253}
2254
2255fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
2260 let old_norm = crate::crlf::crlf_to_lf(old);
2263 let new_norm = crate::crlf::crlf_to_lf(new);
2264
2265 let src_size = old_norm.len();
2266 let dst_size = new_norm.len();
2267
2268 if src_size == 0 && dst_size == 0 {
2269 return 100;
2270 }
2271 let total = src_size + dst_size;
2272 if total == 0 {
2273 return 100;
2274 }
2275
2276 use similar::{ChangeTag, TextDiff};
2278 let old_str = String::from_utf8_lossy(&old_norm);
2279 let new_str = String::from_utf8_lossy(&new_norm);
2280 let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
2281
2282 let mut shared_bytes = 0usize;
2283 for change in diff.iter_all_changes() {
2284 if change.tag() == ChangeTag::Equal {
2285 shared_bytes += change.value().len();
2287 }
2288 }
2289
2290 let max_size = src_size.max(dst_size);
2293
2294 ((shared_bytes * 100) / max_size).min(100) as u32
2295}
2296
2297#[must_use]
2301pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
2302 compute_similarity(old, new)
2303}
2304
2305pub fn format_raw(entry: &DiffEntry) -> String {
2311 let path = match entry.status {
2312 DiffStatus::Renamed | DiffStatus::Copied => {
2313 format!(
2314 "{}\t{}",
2315 entry.old_path.as_deref().unwrap_or(""),
2316 entry.new_path.as_deref().unwrap_or("")
2317 )
2318 }
2319 _ => entry.path().to_owned(),
2320 };
2321
2322 let status_str = match (entry.status, entry.score) {
2323 (DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
2324 (DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
2325 _ => entry.status.letter().to_string(),
2326 };
2327
2328 format!(
2329 ":{} {} {} {} {}\t{}",
2330 entry.old_mode, entry.new_mode, entry.old_oid, entry.new_oid, status_str, path
2331 )
2332}
2333
2334pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
2336 let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
2337 "..."
2338 } else {
2339 ""
2340 };
2341 let old_hex = format!("{}", entry.old_oid);
2342 let new_hex = format!("{}", entry.new_oid);
2343 let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
2344 let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
2345
2346 let path = entry.path();
2347
2348 format!(
2349 ":{} {} {}{} {}{} {}\t{}",
2350 entry.old_mode,
2351 entry.new_mode,
2352 old_abbrev,
2353 ellipsis,
2354 new_abbrev,
2355 ellipsis,
2356 entry.status.letter(),
2357 path
2358 )
2359}
2360
2361pub fn unified_diff(
2376 old_content: &str,
2377 new_content: &str,
2378 old_path: &str,
2379 new_path: &str,
2380 context_lines: usize,
2381 indent_heuristic: bool,
2382 quote_path_fully: bool,
2383) -> String {
2384 unified_diff_with_prefix(
2385 old_content,
2386 new_content,
2387 old_path,
2388 new_path,
2389 context_lines,
2390 0,
2391 "a/",
2392 "b/",
2393 indent_heuristic,
2394 quote_path_fully,
2395 )
2396}
2397
2398#[allow(clippy::too_many_arguments)] pub fn unified_diff_with_prefix(
2404 old_content: &str,
2405 new_content: &str,
2406 old_path: &str,
2407 new_path: &str,
2408 context_lines: usize,
2409 inter_hunk_context: usize,
2410 src_prefix: &str,
2411 dst_prefix: &str,
2412 indent_heuristic: bool,
2413 quote_path_fully: bool,
2414) -> String {
2415 unified_diff_with_prefix_and_funcname(
2416 old_content,
2417 new_content,
2418 old_path,
2419 new_path,
2420 context_lines,
2421 inter_hunk_context,
2422 src_prefix,
2423 dst_prefix,
2424 None,
2425 indent_heuristic,
2426 quote_path_fully,
2427 )
2428}
2429
2430#[allow(clippy::too_many_arguments)]
2433pub fn unified_diff_with_prefix_and_funcname(
2434 old_content: &str,
2435 new_content: &str,
2436 old_path: &str,
2437 new_path: &str,
2438 context_lines: usize,
2439 inter_hunk_context: usize,
2440 src_prefix: &str,
2441 dst_prefix: &str,
2442 funcname_matcher: Option<&FuncnameMatcher>,
2443 indent_heuristic: bool,
2444 quote_path_fully: bool,
2445) -> String {
2446 unified_diff_with_prefix_and_funcname_and_algorithm(
2447 old_content,
2448 new_content,
2449 old_path,
2450 new_path,
2451 context_lines,
2452 inter_hunk_context,
2453 src_prefix,
2454 dst_prefix,
2455 funcname_matcher,
2456 similar::Algorithm::Myers,
2457 false,
2458 false,
2459 indent_heuristic,
2460 quote_path_fully,
2461 )
2462}
2463
2464#[allow(clippy::too_many_arguments)]
2470pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
2471 old_content: &str,
2472 new_content: &str,
2473 old_path: &str,
2474 new_path: &str,
2475 context_lines: usize,
2476 inter_hunk_context: usize,
2477 src_prefix: &str,
2478 dst_prefix: &str,
2479 funcname_matcher: Option<&FuncnameMatcher>,
2480 algorithm: similar::Algorithm,
2481 function_context: bool,
2482 use_git_histogram: bool,
2483 indent_heuristic: bool,
2484 quote_path_fully: bool,
2485) -> String {
2486 if use_git_histogram {
2487 return unified_diff_histogram_with_prefix_and_funcname(
2488 old_content,
2489 new_content,
2490 old_path,
2491 new_path,
2492 context_lines,
2493 inter_hunk_context,
2494 src_prefix,
2495 dst_prefix,
2496 funcname_matcher,
2497 quote_path_fully,
2498 );
2499 }
2500
2501 if function_context {
2502 return unified_diff_with_function_context(
2503 old_content,
2504 new_content,
2505 old_path,
2506 new_path,
2507 context_lines,
2508 inter_hunk_context,
2509 src_prefix,
2510 dst_prefix,
2511 funcname_matcher,
2512 algorithm,
2513 indent_heuristic,
2514 quote_path_fully,
2515 );
2516 }
2517
2518 use crate::quote_path::format_diff_path_with_prefix;
2519 use similar::{group_diff_ops, udiff::UnifiedDiffHunk, TextDiff};
2520
2521 let diff = TextDiff::configure()
2522 .algorithm(algorithm)
2523 .diff_lines(old_content, new_content);
2524 let compacted_ops = diff_indent_heuristic::diff_lines_ops_compacted(
2525 old_content,
2526 new_content,
2527 algorithm,
2528 indent_heuristic,
2529 );
2530
2531 let mut output = String::new();
2532 if old_path == "/dev/null" {
2533 output.push_str("--- /dev/null\n");
2534 } else if src_prefix.is_empty() {
2535 output.push_str(&format!("--- {old_path}\n"));
2538 } else {
2539 output.push_str("--- ");
2540 output.push_str(&format_diff_path_with_prefix(
2541 src_prefix,
2542 old_path,
2543 quote_path_fully,
2544 ));
2545 output.push('\n');
2546 }
2547 if new_path == "/dev/null" {
2548 output.push_str("+++ /dev/null\n");
2549 } else if dst_prefix.is_empty() {
2550 output.push_str(&format!("+++ {new_path}\n"));
2551 } else {
2552 output.push_str("+++ ");
2553 output.push_str(&format_diff_path_with_prefix(
2554 dst_prefix,
2555 new_path,
2556 quote_path_fully,
2557 ));
2558 output.push('\n');
2559 }
2560
2561 let old_lines: Vec<&str> = old_content.lines().collect();
2562
2563 let max_common_gap = context_lines
2568 .saturating_mul(2)
2569 .saturating_add(inter_hunk_context);
2570 let group_radius = max_common_gap.div_ceil(2);
2571 let op_groups = group_diff_ops(compacted_ops, group_radius);
2572
2573 for ops in op_groups {
2574 if ops.is_empty() {
2575 continue;
2576 }
2577 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
2578 let hunk_str = format!("{hunk}");
2579 if let Some(first_newline) = hunk_str.find('\n') {
2583 let header_line = &hunk_str[..first_newline];
2584 let rest = &hunk_str[first_newline..];
2585
2586 if let Some(func_ctx) =
2588 extract_function_context(header_line, &old_lines, funcname_matcher)
2589 {
2590 output.push_str(header_line);
2591 output.push(' ');
2592 output.push_str(&func_ctx);
2593 output.push_str(rest);
2594 } else {
2595 output.push_str(&hunk_str);
2596 }
2597 } else {
2598 output.push_str(&hunk_str);
2599 }
2600 }
2601
2602 output
2603}
2604
2605fn unified_diff_with_function_context(
2607 old_content: &str,
2608 new_content: &str,
2609 old_path: &str,
2610 new_path: &str,
2611 context_lines: usize,
2612 inter_hunk_context: usize,
2613 src_prefix: &str,
2614 dst_prefix: &str,
2615 funcname_matcher: Option<&FuncnameMatcher>,
2616 algorithm: similar::Algorithm,
2617 indent_heuristic: bool,
2618 quote_path_fully: bool,
2619) -> String {
2620 use crate::quote_path::format_diff_path_with_prefix;
2621 use similar::{group_diff_ops, udiff::UnifiedDiffHunk, TextDiff};
2622
2623 let diff = TextDiff::configure()
2624 .algorithm(algorithm)
2625 .diff_lines(old_content, new_content);
2626
2627 let old_lines: Vec<&str> = old_content.lines().collect();
2628 let new_lines: Vec<&str> = new_content.lines().collect();
2629 let n_old = old_lines.len();
2630 let n_new = new_lines.len();
2631
2632 let group_radius = context_lines
2633 .saturating_mul(2)
2634 .saturating_add(inter_hunk_context);
2635 let all_ops = diff.ops().to_vec();
2636 let op_groups = group_diff_ops(all_ops.clone(), group_radius);
2637
2638 let mut ranges: Vec<(usize, usize, usize, usize)> = Vec::new();
2639
2640 for ops in op_groups {
2641 if ops.is_empty() {
2642 continue;
2643 }
2644 let i1_anchor = func_context_old_anchor(&ops, n_old);
2645 let i1_end = hunk_old_change_end_exclusive(&ops);
2646 let skip_preimage_pull =
2647 append_with_whole_function_added(&ops, n_old, n_new, &new_lines, funcname_matcher);
2648 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
2649 let hunk_str = format!("{hunk}");
2650 let header_line = hunk_str
2651 .lines()
2652 .next()
2653 .unwrap_or("")
2654 .trim_end_matches(['\r', '\n']);
2655 let Some((base_s1, base_e1, _base_s2, _base_e2)) =
2656 parse_unified_hunk_header_ranges(header_line)
2657 else {
2658 continue;
2659 };
2660
2661 let ctx = context_lines;
2662 let (s1, e1, s2, e2) = if skip_preimage_pull {
2663 let s = n_old.saturating_sub(ctx);
2664 let s2 = map_old_line_to_new(&all_ops, s, n_new).min(n_new);
2665 (s, n_old, s2, n_new)
2666 } else {
2667 let mut s1 = base_s1.saturating_sub(ctx);
2668 let mut s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
2669
2670 let base_pre_s1 = i1_anchor.saturating_sub(ctx);
2671 if base_pre_s1 < s1 {
2672 s1 = base_pre_s1;
2673 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
2674 }
2675
2676 let fs1 = expand_func_pre_start(s1, i1_anchor, n_old, &old_lines, funcname_matcher);
2677 if fs1 < s1 {
2678 s1 = fs1;
2679 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
2680 }
2681
2682 let mut e1 = (base_e1 + ctx).min(n_old);
2683 let mut e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
2684 let fe1 = expand_func_post_end(e1, i1_end, n_old, &old_lines, funcname_matcher);
2685 if fe1 > e1 {
2686 e1 = fe1;
2687 e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
2688 }
2689 (s1, e1, s2, e2)
2690 };
2691
2692 ranges.push((s1, e1, s2, e2));
2693 }
2694
2695 let mut output = String::new();
2696 if old_path == "/dev/null" {
2697 output.push_str("--- /dev/null\n");
2698 } else if src_prefix.is_empty() {
2699 output.push_str(&format!("--- {old_path}\n"));
2700 } else {
2701 output.push_str("--- ");
2702 output.push_str(&format_diff_path_with_prefix(
2703 src_prefix,
2704 old_path,
2705 quote_path_fully,
2706 ));
2707 output.push('\n');
2708 }
2709 if new_path == "/dev/null" {
2710 output.push_str("+++ /dev/null\n");
2711 } else if dst_prefix.is_empty() {
2712 output.push_str(&format!("+++ {new_path}\n"));
2713 } else {
2714 output.push_str("+++ ");
2715 output.push_str(&format_diff_path_with_prefix(
2716 dst_prefix,
2717 new_path,
2718 quote_path_fully,
2719 ));
2720 output.push('\n');
2721 }
2722
2723 for (s1, e1, s2, e2) in ranges {
2724 if s1 >= e1 && s2 >= e2 {
2725 continue;
2726 }
2727 let old_seg =
2728 line_slice_for_diff_with_eof_nl(&old_lines, s1, e1, old_content.ends_with('\n'));
2729 let new_seg =
2730 line_slice_for_diff_with_eof_nl(&new_lines, s2, e2, new_content.ends_with('\n'));
2731 let inner_ctx = old_seg.lines().count().max(new_seg.lines().count()).max(1);
2732 let piece = unified_diff_with_prefix_and_funcname_and_algorithm(
2733 &old_seg,
2734 &new_seg,
2735 old_path,
2736 new_path,
2737 inner_ctx,
2738 0,
2739 src_prefix,
2740 dst_prefix,
2741 funcname_matcher,
2742 algorithm,
2743 false,
2744 false,
2745 indent_heuristic,
2746 quote_path_fully,
2747 );
2748 let shifted = shift_unified_hunk_headers_to_full_file(&piece, s1, s2);
2749 let with_func =
2750 enrich_unified_hunk_headers_funcname(&shifted, &old_lines, funcname_matcher);
2751 for line in with_func.lines() {
2752 if line.starts_with("--- ") || line.starts_with("+++ ") {
2753 continue;
2754 }
2755 output.push_str(line);
2756 output.push('\n');
2757 }
2758 }
2759
2760 output
2761}
2762
2763fn shift_unified_hunk_headers_to_full_file(
2768 patch: &str,
2769 delta_old: usize,
2770 delta_new: usize,
2771) -> String {
2772 if delta_old == 0 && delta_new == 0 {
2773 return patch.to_owned();
2774 }
2775 let mut out = String::with_capacity(patch.len());
2776 for line in patch.lines() {
2777 if let Some(shifted) = shift_one_unified_hunk_header(line, delta_old, delta_new) {
2778 out.push_str(&shifted);
2779 } else {
2780 out.push_str(line);
2781 }
2782 out.push('\n');
2783 }
2784 out
2785}
2786
2787fn shift_one_unified_hunk_header(line: &str, delta_old: usize, delta_new: usize) -> Option<String> {
2788 let rest = line.strip_prefix("@@ ")?;
2789 let (old_chunk, after_plus) = rest.split_once(" +")?;
2790 let old_spec = old_chunk.strip_prefix('-')?;
2791 let (new_spec, suffix) = after_plus.split_once(" @@")?;
2792 let shifted_old = shift_unified_range_spec(old_spec, delta_old)?;
2793 let shifted_new = shift_unified_range_spec(new_spec, delta_new)?;
2794 Some(format!("@@ -{shifted_old} +{shifted_new} @@{suffix}"))
2795}
2796
2797fn shift_unified_range_spec(spec: &str, delta: usize) -> Option<String> {
2798 let spec = spec.trim();
2799 if let Some((start_s, count_s)) = spec.split_once(',') {
2800 let start: usize = start_s.parse().ok()?;
2801 let count: usize = count_s.parse().ok()?;
2802 Some(format!("{},{}", start.saturating_add(delta), count))
2803 } else {
2804 let start: usize = spec.parse().ok()?;
2805 Some(format!("{}", start.saturating_add(delta)))
2806 }
2807}
2808
2809fn enrich_unified_hunk_headers_funcname(
2811 patch: &str,
2812 full_old_lines: &[&str],
2813 funcname_matcher: Option<&FuncnameMatcher>,
2814) -> String {
2815 let mut out = String::with_capacity(patch.len());
2816 for line in patch.lines() {
2817 if let Some(fixed) = enrich_one_hunk_header_funcname(line, full_old_lines, funcname_matcher)
2818 {
2819 out.push_str(&fixed);
2820 } else {
2821 out.push_str(line);
2822 }
2823 out.push('\n');
2824 }
2825 out
2826}
2827
2828fn enrich_one_hunk_header_funcname(
2829 line: &str,
2830 full_old_lines: &[&str],
2831 funcname_matcher: Option<&FuncnameMatcher>,
2832) -> Option<String> {
2833 let after_at = line.strip_prefix("@@ ")?;
2834 let idx = after_at.find(" @@")?;
2835 let mid = after_at[..idx].trim();
2836 let tail = after_at[idx + 3..].trim_start();
2837 let header_for_parse = format!("@@ {mid} @@");
2838 let func = extract_function_context(&header_for_parse, full_old_lines, funcname_matcher);
2839 Some(if let Some(f) = func {
2840 format!("@@ {mid} @@ {f}")
2841 } else if !tail.is_empty() {
2842 format!("@@ {mid} @@ {tail}")
2843 } else {
2844 format!("@@ {mid} @@")
2845 })
2846}
2847
2848fn line_slice_for_diff_with_eof_nl(
2849 lines: &[&str],
2850 start: usize,
2851 end: usize,
2852 full_file_ends_with_newline: bool,
2853) -> String {
2854 if start >= end {
2855 return String::new();
2856 }
2857 let mut s = lines[start..end].join("\n");
2858 let slice_is_suffix_of_file = end == lines.len();
2859 let need_trailing_nl = if slice_is_suffix_of_file {
2860 full_file_ends_with_newline
2861 } else {
2862 true
2863 };
2864 if need_trailing_nl && !s.ends_with('\n') {
2865 s.push('\n');
2866 }
2867 s
2868}
2869
2870fn map_old_line_to_new(ops: &[similar::DiffOp], old_line: usize, n_new: usize) -> usize {
2873 use similar::DiffOp;
2874 let mut n = 0usize;
2875 for op in ops {
2876 match *op {
2877 DiffOp::Equal {
2878 old_index,
2879 new_index,
2880 len,
2881 } => {
2882 if old_index + len <= old_line {
2883 n = new_index + len;
2884 continue;
2885 }
2886 if old_index < old_line {
2887 let take = old_line - old_index;
2888 return (new_index + take).min(n_new);
2889 }
2890 return new_index.min(n_new);
2891 }
2892 DiffOp::Delete {
2893 old_index,
2894 old_len,
2895 new_index,
2896 } => {
2897 if old_index + old_len <= old_line {
2898 n = new_index;
2899 continue;
2900 }
2901 if old_index < old_line {
2902 return new_index.min(n_new);
2903 }
2904 }
2905 DiffOp::Insert {
2906 old_index,
2907 new_index,
2908 new_len,
2909 } => {
2910 if old_index < old_line {
2911 n = new_index + new_len;
2912 continue;
2913 }
2914 if old_index == old_line {
2915 return (new_index + new_len).min(n_new);
2918 }
2919 return new_index.min(n_new);
2920 }
2921 DiffOp::Replace {
2922 old_index,
2923 old_len,
2924 new_index,
2925 new_len,
2926 } => {
2927 if old_index + old_len <= old_line {
2928 n = new_index + new_len;
2929 continue;
2930 }
2931 if old_index < old_line {
2932 let into_old = old_line - old_index;
2933 let mapped = new_index + into_old.min(new_len);
2934 return mapped.min(n_new);
2935 }
2936 return new_index.min(n_new);
2937 }
2938 }
2939 }
2940 n.min(n_new)
2941}
2942
2943fn parse_unified_hunk_header_ranges(header: &str) -> Option<(usize, usize, usize, usize)> {
2945 let rest = header.strip_prefix("@@ ")?;
2946 let (old_tok, rest2) = rest.split_once(" +")?;
2947 let old_tok = old_tok.strip_prefix('-')?;
2948 let new_tok = rest2.split_once(" @@").map(|(a, _)| a)?;
2949
2950 fn parse_side(spec: &str) -> Option<(usize, usize)> {
2951 let spec = spec.trim();
2952 let (start_one_based, count) = if let Some((a, b)) = spec.split_once(',') {
2953 (a.parse::<usize>().ok()?, b.parse::<usize>().ok()?)
2954 } else {
2955 let s = spec.parse::<usize>().ok()?;
2956 (s, 1usize)
2957 };
2958 let s0 = start_one_based.saturating_sub(1);
2959 let e0 = s0.saturating_add(count);
2960 Some((s0, e0))
2961 }
2962
2963 let (os, oe) = parse_side(old_tok)?;
2964 let (ns, ne) = parse_side(new_tok)?;
2965 Some((os, oe, ns, ne))
2966}
2967
2968fn append_with_whole_function_added(
2971 ops: &[similar::DiffOp],
2972 n_old: usize,
2973 n_new: usize,
2974 new_lines: &[&str],
2975 matcher: Option<&FuncnameMatcher>,
2976) -> bool {
2977 use similar::DiffOp;
2978 if n_old == 0 {
2979 return false;
2980 }
2981 let mut only_ins_or_eq = true;
2982 let mut min_new_ins = usize::MAX;
2983 for op in ops {
2984 match *op {
2985 DiffOp::Equal { .. } => {}
2986 DiffOp::Insert {
2987 new_index, new_len, ..
2988 } => {
2989 min_new_ins = min_new_ins.min(new_index);
2990 if new_len == 0 {
2991 only_ins_or_eq = false;
2992 }
2993 }
2994 DiffOp::Delete { .. } | DiffOp::Replace { .. } => {
2995 only_ins_or_eq = false;
2996 }
2997 }
2998 }
2999 let mut insert_at_eof = false;
3000 for op in ops {
3001 if let DiffOp::Insert { old_index, .. } = *op {
3002 if old_index == n_old {
3003 insert_at_eof = true;
3004 break;
3005 }
3006 }
3007 }
3008 let append_at_eof = min_new_ins == n_old || insert_at_eof;
3009 if !only_ins_or_eq || !append_at_eof || min_new_ins == usize::MAX {
3010 return false;
3011 }
3012 let mut j = min_new_ins;
3017 while j < n_new {
3018 let line = new_lines[j];
3019 if line.trim().is_empty() {
3020 j += 1;
3021 continue;
3022 }
3023 if let Some(m) = matcher {
3024 if m.match_line(line).is_some() {
3025 return true;
3026 }
3027 } else if inserted_block_starts_with_c_like_function_definition(line) {
3028 return true;
3029 }
3030 j += 1;
3031 }
3032 false
3033}
3034
3035fn inserted_block_starts_with_c_like_function_definition(line: &str) -> bool {
3036 let t = line.trim_start();
3037 let Some(open_paren) = t.find('(') else {
3038 return false;
3039 };
3040 let head = &t[..open_paren];
3041 let tokens: Vec<&str> = head.split_whitespace().collect();
3042 if tokens.len() < 2 {
3043 return false;
3045 }
3046 let nameish = tokens.last().copied().unwrap_or("");
3047 let name = nameish.trim_end_matches(['*', '&']);
3048 if name.is_empty() || !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
3049 return false;
3050 }
3051 let type_or_modifier = |tok: &str| {
3052 matches!(
3053 tok,
3054 "static"
3055 | "extern"
3056 | "inline"
3057 | "void"
3058 | "int"
3059 | "char"
3060 | "short"
3061 | "long"
3062 | "float"
3063 | "double"
3064 | "unsigned"
3065 | "signed"
3066 | "struct"
3067 | "enum"
3068 | "union"
3069 | "const"
3070 | "volatile"
3071 | "typedef"
3072 )
3073 };
3074 tokens[..tokens.len() - 1]
3075 .iter()
3076 .any(|tok| type_or_modifier(tok))
3077}
3078
3079fn hunk_old_change_end_exclusive(ops: &[similar::DiffOp]) -> usize {
3080 use similar::DiffOp;
3081 let mut max_o = 0usize;
3082 for op in ops {
3083 match *op {
3084 DiffOp::Delete {
3085 old_index, old_len, ..
3086 } => {
3087 max_o = max_o.max(old_index + old_len);
3088 }
3089 DiffOp::Replace {
3090 old_index, old_len, ..
3091 } => {
3092 max_o = max_o.max(old_index + old_len);
3093 }
3094 DiffOp::Insert { old_index, .. } => {
3095 max_o = max_o.max(old_index);
3098 }
3099 DiffOp::Equal { .. } => {}
3100 }
3101 }
3102 max_o
3103}
3104
3105fn func_context_old_anchor(ops: &[similar::DiffOp], n_old: usize) -> usize {
3106 use similar::DiffOp;
3107 let mut has_delete_or_replace = false;
3108 let mut min_del = usize::MAX;
3109 let mut min_ins_old = usize::MAX;
3110
3111 for op in ops {
3112 match *op {
3113 DiffOp::Delete {
3114 old_index, old_len, ..
3115 } => {
3116 has_delete_or_replace = true;
3117 min_del = min_del.min(old_index);
3118 min_del = min_del.min(old_index + old_len.saturating_sub(1));
3119 }
3120 DiffOp::Replace {
3121 old_index, old_len, ..
3122 } => {
3123 has_delete_or_replace = true;
3124 min_del = min_del.min(old_index);
3125 min_del = min_del.min(old_index + old_len.saturating_sub(1));
3126 }
3127 DiffOp::Insert { old_index, .. } => {
3128 min_ins_old = min_ins_old.min(old_index);
3129 }
3130 DiffOp::Equal { .. } => {}
3131 }
3132 }
3133
3134 let mut i1 = if has_delete_or_replace {
3135 min_del
3136 } else if min_ins_old != usize::MAX {
3137 min_ins_old
3138 } else {
3139 0
3140 };
3141
3142 let pure_insert = ops
3143 .iter()
3144 .all(|op| matches!(op, DiffOp::Insert { .. } | DiffOp::Equal { .. }))
3145 && ops.iter().any(|op| matches!(op, DiffOp::Insert { .. }));
3146
3147 if pure_insert && i1 >= n_old && n_old > 0 {
3148 i1 = n_old - 1;
3149 }
3150
3151 i1.min(n_old.saturating_sub(1))
3152}
3153
3154fn expand_func_pre_start(
3155 s1: usize,
3156 i1: usize,
3157 n_old: usize,
3158 old_lines: &[&str],
3159 matcher: Option<&FuncnameMatcher>,
3160) -> usize {
3161 if n_old == 0 {
3162 return s1;
3163 }
3164 let i1 = i1.min(n_old.saturating_sub(1));
3165 let mut fs1 = get_func_line_backward(old_lines, i1, matcher).unwrap_or(i1);
3166 while fs1 > 0
3167 && !is_line_empty_for_func_context(old_lines[fs1 - 1])
3168 && !is_func_line(old_lines[fs1 - 1], matcher)
3169 {
3170 fs1 -= 1;
3171 }
3172 s1.min(fs1)
3173}
3174
3175fn expand_func_post_end(
3176 e1: usize,
3177 i1_end: usize,
3178 n_old: usize,
3179 old_lines: &[&str],
3180 matcher: Option<&FuncnameMatcher>,
3181) -> usize {
3182 let from = i1_end.min(n_old);
3183 let fe1 = get_func_line_forward(old_lines, from, matcher).unwrap_or(n_old);
3184 let mut fe1_adj = fe1;
3185 while fe1_adj > 0 && is_line_empty_for_func_context(old_lines[fe1_adj - 1]) {
3186 fe1_adj -= 1;
3187 }
3188 e1.max(fe1_adj).min(n_old)
3189}
3190
3191fn is_line_empty_for_func_context(line: &str) -> bool {
3192 line.chars().all(|c| c.is_whitespace())
3193}
3194
3195fn is_func_line(line: &str, matcher: Option<&FuncnameMatcher>) -> bool {
3196 if let Some(m) = matcher {
3197 return m.match_line(line).is_some();
3198 }
3199 let t = line.trim_end_matches(['\n', '\r']);
3200 if t.is_empty() {
3201 return false;
3202 }
3203 let b = t.as_bytes()[0];
3204 b.is_ascii_alphabetic() || b == b'_' || b == b'$'
3205}
3206
3207fn get_func_line_backward(
3208 old_lines: &[&str],
3209 start: usize,
3210 matcher: Option<&FuncnameMatcher>,
3211) -> Option<usize> {
3212 let mut l = start.min(old_lines.len().saturating_sub(1));
3213 if old_lines.is_empty() {
3214 return None;
3215 }
3216 loop {
3217 if is_func_line(old_lines[l], matcher) {
3218 return Some(l);
3219 }
3220 if l == 0 {
3221 break;
3222 }
3223 l -= 1;
3224 }
3225 None
3226}
3227
3228fn get_func_line_forward(
3229 old_lines: &[&str],
3230 start: usize,
3231 matcher: Option<&FuncnameMatcher>,
3232) -> Option<usize> {
3233 let mut l = start;
3234 while l < old_lines.len() {
3235 if is_func_line(old_lines[l], matcher) {
3236 return Some(l);
3237 }
3238 l += 1;
3239 }
3240 None
3241}
3242
3243pub fn anchored_unified_diff(
3253 old_content: &str,
3254 new_content: &str,
3255 old_path: &str,
3256 new_path: &str,
3257 context_lines: usize,
3258 anchors: &[String],
3259 algorithm: similar::Algorithm,
3260 use_git_histogram: bool,
3261 indent_heuristic: bool,
3262 quote_path_fully: bool,
3263) -> String {
3264 use crate::quote_path::format_diff_path_with_prefix;
3265 use similar::TextDiff;
3266
3267 let old_lines: Vec<&str> = old_content.lines().collect();
3268 let new_lines: Vec<&str> = new_content.lines().collect();
3269
3270 let mut anchor_pairs: Vec<(usize, usize)> = Vec::new(); for anchor in anchors {
3274 let anchor_str = anchor.as_str();
3275
3276 let old_positions: Vec<usize> = old_lines
3278 .iter()
3279 .enumerate()
3280 .filter(|(_, l)| l.trim_end() == anchor_str)
3281 .map(|(i, _)| i)
3282 .collect();
3283
3284 let new_positions: Vec<usize> = new_lines
3286 .iter()
3287 .enumerate()
3288 .filter(|(_, l)| l.trim_end() == anchor_str)
3289 .map(|(i, _)| i)
3290 .collect();
3291
3292 if old_positions.len() == 1 && new_positions.len() == 1 {
3294 anchor_pairs.push((old_positions[0], new_positions[0]));
3295 }
3296 }
3297
3298 if anchor_pairs.is_empty() {
3300 return unified_diff_with_prefix_and_funcname_and_algorithm(
3301 old_content,
3302 new_content,
3303 old_path,
3304 new_path,
3305 context_lines,
3306 0,
3307 "a/",
3308 "b/",
3309 None,
3310 algorithm,
3311 false,
3312 use_git_histogram,
3313 indent_heuristic,
3314 quote_path_fully,
3315 );
3316 }
3317
3318 anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
3320
3321 let mut filtered: Vec<(usize, usize)> = Vec::new();
3324 for &pair in &anchor_pairs {
3325 if filtered.is_empty() || filtered.last().is_some_and(|last| pair.1 > last.1) {
3326 filtered.push(pair);
3327 }
3328 }
3329 let anchor_pairs = filtered;
3330
3331 struct LineDiffOp {
3340 tag: char, line: String,
3342 }
3343
3344 let append_segment_diff =
3345 |ops: &mut Vec<LineDiffOp>, old_seg_input: &str, new_seg_input: &str| {
3346 use similar::ChangeTag;
3347 let old_ls: Vec<&str> = old_seg_input.lines().collect();
3348 let new_ls: Vec<&str> = new_seg_input.lines().collect();
3349 if old_ls.is_empty() && new_ls.is_empty() {
3350 return;
3351 }
3352 let seg_diff = TextDiff::configure()
3353 .algorithm(algorithm)
3354 .diff_slices(&old_ls, &new_ls);
3355 let raw = seg_diff.ops().to_vec();
3356 let compacted = diff_indent_heuristic::apply_change_compact_to_ops(
3357 &raw,
3358 &old_ls,
3359 &new_ls,
3360 indent_heuristic,
3361 );
3362 for op in &compacted {
3363 for ch in op.iter_changes(&old_ls, &new_ls) {
3364 let t = match ch.tag() {
3365 ChangeTag::Equal => ' ',
3366 ChangeTag::Delete => '-',
3367 ChangeTag::Insert => '+',
3368 };
3369 ops.push(LineDiffOp {
3370 tag: t,
3371 line: ch.value().to_string(),
3372 });
3373 }
3374 }
3375 };
3376
3377 let mut ops: Vec<LineDiffOp> = Vec::new();
3378 let mut old_pos = 0usize;
3379 let mut new_pos = 0usize;
3380
3381 for &(old_anchor, new_anchor) in &anchor_pairs {
3382 let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
3384 let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
3385
3386 let old_seg_text = old_segment.join("\n");
3387 let new_seg_text = new_segment.join("\n");
3388
3389 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
3390 let old_seg_input = if old_seg_text.is_empty() {
3391 String::new()
3392 } else {
3393 format!("{}\n", old_seg_text)
3394 };
3395 let new_seg_input = if new_seg_text.is_empty() {
3396 String::new()
3397 } else {
3398 format!("{}\n", new_seg_text)
3399 };
3400 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
3401 }
3402
3403 ops.push(LineDiffOp {
3405 tag: ' ',
3406 line: old_lines[old_anchor].to_string(),
3407 });
3408
3409 old_pos = old_anchor + 1;
3410 new_pos = new_anchor + 1;
3411 }
3412
3413 let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
3415 let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
3416 let old_seg_text = old_segment.join("\n");
3417 let new_seg_text = new_segment.join("\n");
3418
3419 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
3420 let old_seg_input = if old_seg_text.is_empty() {
3421 String::new()
3422 } else {
3423 format!("{}\n", old_seg_text)
3424 };
3425 let new_seg_input = if new_seg_text.is_empty() {
3426 String::new()
3427 } else {
3428 format!("{}\n", new_seg_text)
3429 };
3430 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
3431 }
3432
3433 let mut output = String::new();
3435 if old_path == "/dev/null" {
3436 output.push_str("--- /dev/null\n");
3437 } else {
3438 output.push_str("--- ");
3439 output.push_str(&format_diff_path_with_prefix(
3440 "a/",
3441 old_path,
3442 quote_path_fully,
3443 ));
3444 output.push('\n');
3445 }
3446 if new_path == "/dev/null" {
3447 output.push_str("+++ /dev/null\n");
3448 } else {
3449 output.push_str("+++ ");
3450 output.push_str(&format_diff_path_with_prefix(
3451 "b/",
3452 new_path,
3453 quote_path_fully,
3454 ));
3455 output.push('\n');
3456 }
3457
3458 let total_ops = ops.len();
3460 if total_ops == 0 {
3461 return output;
3462 }
3463
3464 let mut hunks: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
3467 while i < total_ops {
3468 if ops[i].tag != ' ' {
3469 let start = i.saturating_sub(context_lines);
3470 let mut end = i;
3471 while end < total_ops {
3473 if ops[end].tag != ' ' {
3474 end += 1;
3475 continue;
3476 }
3477 let mut next_change = end;
3479 while next_change < total_ops && ops[next_change].tag == ' ' {
3480 next_change += 1;
3481 }
3482 if next_change < total_ops && next_change - end <= context_lines * 2 {
3483 end = next_change + 1;
3484 } else {
3485 end = (end + context_lines).min(total_ops);
3486 break;
3487 }
3488 }
3489 if let Some(last) = hunks.last_mut() {
3491 if start <= last.1 {
3492 last.1 = end;
3493 } else {
3494 hunks.push((start, end));
3495 }
3496 } else {
3497 hunks.push((start, end));
3498 }
3499 i = end;
3500 } else {
3501 i += 1;
3502 }
3503 }
3504
3505 for (start, end) in hunks {
3507 let mut old_start = 1usize;
3509 let mut new_start = 1usize;
3510 for op in &ops[..start] {
3512 match op.tag {
3513 ' ' => {
3514 old_start += 1;
3515 new_start += 1;
3516 }
3517 '-' => {
3518 old_start += 1;
3519 }
3520 '+' => {
3521 new_start += 1;
3522 }
3523 _ => {}
3524 }
3525 }
3526 let mut old_count = 0usize;
3527 let mut new_count = 0usize;
3528 for op in &ops[start..end] {
3529 match op.tag {
3530 ' ' => {
3531 old_count += 1;
3532 new_count += 1;
3533 }
3534 '-' => {
3535 old_count += 1;
3536 }
3537 '+' => {
3538 new_count += 1;
3539 }
3540 _ => {}
3541 }
3542 }
3543
3544 output.push_str(&format!(
3545 "@@ -{},{} +{},{} @@\n",
3546 old_start, old_count, new_start, new_count
3547 ));
3548 for op in &ops[start..end] {
3549 output.push(op.tag);
3550 output.push_str(&op.line);
3551 output.push('\n');
3552 }
3553 }
3554
3555 output
3556}
3557
3558fn extract_function_context(
3564 header: &str,
3565 old_lines: &[&str],
3566 funcname_matcher: Option<&FuncnameMatcher>,
3567) -> Option<String> {
3568 let at_pos = header.find("-")?;
3570 let rest = &header[at_pos + 1..];
3571 let comma_or_space = rest.find([',', ' '])?;
3572 let start_str = &rest[..comma_or_space];
3573 let start_line: usize = start_str.parse().ok()?;
3574
3575 if start_line <= 1 {
3576 return None;
3577 }
3578
3579 let search_end = (start_line - 1).min(old_lines.len());
3584 let truncate = |text: &str| {
3585 if text.len() > 80 {
3586 let mut end = 80;
3587 while end > 0 && !text.is_char_boundary(end) {
3588 end -= 1;
3589 }
3590 text[..end].to_owned()
3591 } else {
3592 text.to_owned()
3593 }
3594 };
3595
3596 for i in (0..search_end).rev() {
3597 let line = old_lines[i];
3598 if line.is_empty() {
3599 continue;
3600 }
3601 if let Some(matcher) = funcname_matcher {
3602 if let Some(matched) = matcher.match_line(line) {
3603 return Some(truncate(&matched));
3604 }
3605 continue;
3606 }
3607
3608 let first = line.as_bytes()[0];
3609 if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
3610 return Some(truncate(line.trim_end_matches(char::is_whitespace)));
3611 }
3612 }
3613 None
3614}
3615
3616pub fn format_stat_line(
3620 path: &str,
3621 insertions: usize,
3622 deletions: usize,
3623 max_path_len: usize,
3624) -> String {
3625 format_stat_line_width(path, insertions, deletions, max_path_len, 0)
3626}
3627
3628pub fn format_stat_line_width(
3629 path: &str,
3630 insertions: usize,
3631 deletions: usize,
3632 max_path_len: usize,
3633 count_width: usize,
3634) -> String {
3635 let total = insertions + deletions;
3636 let plus = "+".repeat(insertions.min(50));
3637 let minus = "-".repeat(deletions.min(50));
3638 let cw = if count_width > 0 {
3639 count_width
3640 } else {
3641 format!("{}", total).len()
3642 };
3643 let bar = format!("{}{}", plus, minus);
3644 if bar.is_empty() {
3645 format!(
3646 " {:<width$} | {:>cw$}",
3647 path,
3648 total,
3649 width = max_path_len,
3650 cw = cw
3651 )
3652 } else {
3653 format!(
3654 " {:<width$} | {:>cw$} {}",
3655 path,
3656 total,
3657 bar,
3658 width = max_path_len,
3659 cw = cw
3660 )
3661 }
3662}
3663
3664#[must_use]
3666pub fn normalize_ignore_space_change_line(line: &str) -> String {
3667 let mut result = String::with_capacity(line.len());
3668 let mut in_space = false;
3669 for c in line.chars() {
3670 if c.is_whitespace() {
3671 if !in_space {
3672 result.push(' ');
3673 in_space = true;
3674 }
3675 } else {
3676 result.push(c);
3677 in_space = false;
3678 }
3679 }
3680 while result.ends_with(' ') {
3681 result.pop();
3682 }
3683 result
3684}
3685
3686#[must_use]
3692pub fn normalize_ignore_space_change(content: &str) -> String {
3693 content
3694 .lines()
3695 .map(normalize_ignore_space_change_line)
3696 .collect::<Vec<_>>()
3697 .join("\n")
3698}
3699
3700pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
3704 count_changes_with_algorithm(old_content, new_content, similar::Algorithm::Myers, false)
3705}
3706
3707#[must_use]
3712pub fn count_changes_with_algorithm(
3713 old_content: &str,
3714 new_content: &str,
3715 algorithm: similar::Algorithm,
3716 use_git_histogram: bool,
3717) -> (usize, usize) {
3718 if use_git_histogram {
3719 use imara_diff::{Algorithm, Diff, InternedInput};
3720 let input = InternedInput::new(old_content, new_content);
3721 let mut d = Diff::compute(Algorithm::Histogram, &input);
3722 d.postprocess_lines(&input);
3723 return (d.count_additions() as usize, d.count_removals() as usize);
3724 }
3725
3726 use similar::{ChangeTag, TextDiff};
3727
3728 let diff = TextDiff::configure()
3729 .algorithm(algorithm)
3730 .diff_lines(old_content, new_content);
3731 let mut ins = 0;
3732 let mut del = 0;
3733
3734 for change in diff.iter_all_changes() {
3735 match change.tag() {
3736 ChangeTag::Insert => ins += 1,
3737 ChangeTag::Delete => del += 1,
3738 ChangeTag::Equal => {}
3739 }
3740 }
3741
3742 (ins, del)
3743}
3744
3745#[must_use]
3750pub fn count_git_lines(data: &[u8]) -> usize {
3751 if data.is_empty() {
3752 return 0;
3753 }
3754 let mut count = 0usize;
3755 let mut nl_just_seen = false;
3756 for &ch in data {
3757 if ch == b'\n' {
3758 count += 1;
3759 nl_just_seen = true;
3760 } else {
3761 nl_just_seen = false;
3762 }
3763 }
3764 if !nl_just_seen {
3765 count += 1;
3766 }
3767 count
3768}
3769
3770pub const GIT_DIFF_MAX_SCORE: u64 = 60_000;
3772const DIFF_MAX_SCORE: u64 = GIT_DIFF_MAX_SCORE;
3773const DIFF_MINIMUM_BREAK_SIZE: usize = 400;
3774const DIFF_DEFAULT_BREAK_SCORE: u64 = 30_000;
3775pub const GIT_DIFF_DEFAULT_BREAK_SCORE: u64 = DIFF_DEFAULT_BREAK_SCORE;
3777pub const GIT_DIFF_DEFAULT_MERGE_SCORE_AFTER_BREAK: u64 = 36_000;
3780const DIFF_HASHBASE: u32 = 107_927;
3781
3782#[derive(Clone, Copy, Default)]
3783struct SpanSlot {
3784 hashval: u32,
3785 cnt: u32,
3786}
3787
3788struct SpanHashTop {
3789 alloc_log2: u8,
3790 free_slots: i32,
3791 data: Vec<SpanSlot>,
3792}
3793
3794impl SpanHashTop {
3795 fn new(initial_log2: u8) -> Self {
3796 let cap = 1usize << initial_log2;
3797 Self {
3798 alloc_log2: initial_log2,
3799 free_slots: initial_free(initial_log2),
3800 data: vec![SpanSlot::default(); cap],
3801 }
3802 }
3803
3804 fn len(&self) -> usize {
3805 1usize << self.alloc_log2
3806 }
3807
3808 fn add_span(&mut self, hashval: u32, cnt: u32) {
3809 loop {
3810 let lim = self.len();
3811 let mut bucket = (hashval as usize) & (lim - 1);
3812 loop {
3813 let h = &mut self.data[bucket];
3814 if h.cnt == 0 {
3815 h.hashval = hashval;
3816 h.cnt = cnt;
3817 self.free_slots -= 1;
3818 if self.free_slots < 0 {
3819 self.rehash();
3820 break;
3821 }
3822 return;
3823 }
3824 if h.hashval == hashval {
3825 h.cnt = h.cnt.saturating_add(cnt);
3826 return;
3827 }
3828 bucket += 1;
3829 if bucket >= lim {
3830 bucket = 0;
3831 }
3832 }
3833 }
3834 }
3835
3836 fn rehash(&mut self) {
3837 let old = std::mem::take(&mut self.data);
3838 let old_log = self.alloc_log2;
3839 self.alloc_log2 = old_log.saturating_add(1);
3840 let new_len = 1usize << self.alloc_log2;
3841 self.free_slots = initial_free(self.alloc_log2);
3842 self.data = vec![SpanSlot::default(); new_len];
3843 let old_sz = 1usize << old_log;
3844 for o in old.iter().take(old_sz) {
3845 let o = *o;
3846 if o.cnt == 0 {
3847 continue;
3848 }
3849 self.add_span_after_rehash(o.hashval, o.cnt);
3850 }
3851 }
3852
3853 fn add_span_after_rehash(&mut self, hashval: u32, cnt: u32) {
3854 loop {
3855 let lim = self.len();
3856 let mut bucket = (hashval as usize) & (lim - 1);
3857 loop {
3858 let h = &mut self.data[bucket];
3859 if h.cnt == 0 {
3860 h.hashval = hashval;
3861 h.cnt = cnt;
3862 self.free_slots -= 1;
3863 if self.free_slots < 0 {
3864 self.rehash();
3865 break;
3866 }
3867 return;
3868 }
3869 if h.hashval == hashval {
3870 h.cnt = h.cnt.saturating_add(cnt);
3871 return;
3872 }
3873 bucket += 1;
3874 if bucket >= lim {
3875 bucket = 0;
3876 }
3877 }
3878 }
3879 }
3880
3881 fn sort_by_hashval(&mut self) {
3882 let sz = self.len();
3883 self.data[..sz].sort_by(|a, b| {
3884 if a.cnt == 0 {
3885 return std::cmp::Ordering::Greater;
3886 }
3887 if b.cnt == 0 {
3888 return std::cmp::Ordering::Less;
3889 }
3890 a.hashval.cmp(&b.hashval)
3891 });
3892 }
3893}
3894
3895fn initial_free(sz_log2: u8) -> i32 {
3896 let sz = sz_log2 as i32;
3897 ((1i32 << sz_log2) * (sz - 3) / sz).max(0)
3898}
3899
3900fn hash_blob_spans(buf: &[u8], is_text: bool) -> SpanHashTop {
3901 let mut hash = SpanHashTop::new(9);
3902 let mut n = 0u32;
3903 let mut accum1: u32 = 0;
3904 let mut accum2: u32 = 0;
3905 let mut i = 0usize;
3906 while i < buf.len() {
3907 let c = buf[i] as u32;
3908 let old_1 = accum1;
3909 i += 1;
3910
3911 if is_text && c == b'\r' as u32 && i < buf.len() && buf[i] == b'\n' {
3912 continue;
3913 }
3914
3915 accum1 = accum1.wrapping_shl(7) ^ accum2.wrapping_shr(25);
3916 accum2 = accum2.wrapping_shl(7) ^ old_1.wrapping_shr(25);
3917 accum1 = accum1.wrapping_add(c);
3918 n += 1;
3919 if n < 64 && c != b'\n' as u32 {
3920 continue;
3921 }
3922 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
3923 hash.add_span(hashval, n);
3924 n = 0;
3925 accum1 = 0;
3926 accum2 = 0;
3927 }
3928 if n > 0 {
3929 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
3930 hash.add_span(hashval, n);
3931 }
3932 hash.sort_by_hashval();
3933 hash
3934}
3935
3936#[must_use]
3941pub fn diffcore_count_changes(old: &[u8], new: &[u8]) -> (u64, u64) {
3942 let src_is_text = !crate::merge_file::is_binary(old);
3943 let dst_is_text = !crate::merge_file::is_binary(new);
3944 let src_count = hash_blob_spans(old, src_is_text);
3945 let dst_count = hash_blob_spans(new, dst_is_text);
3946 let mut sc: u64 = 0;
3947 let mut la: u64 = 0;
3948 let mut si = 0usize;
3949 let mut di = 0usize;
3950 let src_len = src_count.len();
3951 let dst_len = dst_count.len();
3952 loop {
3953 if si >= src_len || src_count.data[si].cnt == 0 {
3954 break;
3955 }
3956 let s_hash = src_count.data[si].hashval;
3957 let s_cnt = u64::from(src_count.data[si].cnt);
3958 while di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval < s_hash {
3959 la += u64::from(dst_count.data[di].cnt);
3960 di += 1;
3961 }
3962 let mut dst_cnt = 0u64;
3963 if di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval == s_hash {
3964 dst_cnt = u64::from(dst_count.data[di].cnt);
3965 di += 1;
3966 }
3967 if s_cnt < dst_cnt {
3968 la += dst_cnt - s_cnt;
3969 sc += s_cnt;
3970 } else {
3971 sc += dst_cnt;
3972 }
3973 si += 1;
3974 }
3975 while di < dst_len && dst_count.data[di].cnt != 0 {
3976 la += u64::from(dst_count.data[di].cnt);
3977 di += 1;
3978 }
3979 (sc, la)
3980}
3981
3982#[must_use]
3985pub fn should_break_rewrite_for_stat(old: &[u8], new: &[u8]) -> bool {
3986 should_break_rewrite_inner(old, new, DIFF_DEFAULT_BREAK_SCORE)
3987}
3988
3989#[must_use]
3993pub fn should_break_rewrite_pair(old: &[u8], new: &[u8], break_score: u64) -> bool {
3994 should_break_rewrite_inner(old, new, break_score)
3995}
3996
3997pub fn parse_diff_rename_score_token(arg: &str) -> Option<u64> {
4000 let mut num: u64 = 0;
4001 let mut scale: u64 = 1;
4002 let mut dot = false;
4003 let mut saw_digit = false;
4004 for ch in arg.chars() {
4005 if !dot && ch == '.' {
4006 scale = 1;
4007 dot = true;
4008 continue;
4009 }
4010 if ch == '%' {
4011 scale = if dot { scale.saturating_mul(100) } else { 100 };
4012 break;
4013 }
4014 if ch.is_ascii_digit() {
4015 saw_digit = true;
4016 if scale < 100_000 {
4017 scale = scale.saturating_mul(10);
4018 num = num.saturating_mul(10) + u64::from(ch as u8 - b'0');
4019 }
4020 } else {
4021 break;
4022 }
4023 }
4024 if !saw_digit {
4025 return None;
4026 }
4027 Some(if num >= scale {
4028 GIT_DIFF_MAX_SCORE
4029 } else {
4030 GIT_DIFF_MAX_SCORE * num / scale
4031 })
4032}
4033
4034#[must_use]
4037pub fn rewrite_merge_score(old: &[u8], new: &[u8]) -> Option<u64> {
4038 if old.is_empty() {
4039 return None;
4040 }
4041 let max_size = old.len().max(new.len());
4042 if max_size < DIFF_MINIMUM_BREAK_SIZE {
4043 return None;
4044 }
4045 let (src_copied, _) = diffcore_count_changes(old, new);
4046 let src_copied = src_copied.min(old.len() as u64);
4047 let src_removed = (old.len() as u64).saturating_sub(src_copied);
4048 Some(src_removed * DIFF_MAX_SCORE / old.len() as u64)
4049}
4050
4051#[must_use]
4053pub fn rewrite_dissimilarity_index_percent(old: &[u8], new: &[u8]) -> Option<u32> {
4054 let score = rewrite_merge_score(old, new)?;
4055 Some((score * 100 / DIFF_MAX_SCORE).min(100) as u32)
4056}
4057
4058fn should_break_rewrite_inner(src: &[u8], dst: &[u8], break_score: u64) -> bool {
4059 if src.is_empty() {
4060 return false;
4061 }
4062 let max_size = src.len().max(dst.len());
4063 if max_size < DIFF_MINIMUM_BREAK_SIZE {
4064 return false;
4065 }
4066 let (src_copied, literal_added) = diffcore_count_changes(src, dst);
4067 let src_copied = src_copied.min(src.len() as u64);
4068 let mut literal_added = literal_added;
4069 let dst_len = dst.len() as u64;
4070 if src_copied < dst_len && literal_added + src_copied > dst_len {
4071 literal_added = dst_len.saturating_sub(src_copied);
4072 }
4073 let src_removed = (src.len() as u64).saturating_sub(src_copied);
4074 let merge_score = src_removed * DIFF_MAX_SCORE / src.len() as u64;
4075 if merge_score > break_score {
4076 return true;
4077 }
4078 let delta_size = src_removed.saturating_add(literal_added);
4079 if delta_size * DIFF_MAX_SCORE / (max_size as u64) < break_score {
4080 return false;
4081 }
4082 let s = src.len() as u64;
4083 if (s * break_score < src_removed * DIFF_MAX_SCORE)
4084 && (literal_added * 20 < src_removed)
4085 && (literal_added * 20 < src_copied)
4086 {
4087 return false;
4088 }
4089 true
4090}
4091
4092struct FlatEntry {
4096 path: String,
4097 mode: u32,
4098 oid: ObjectId,
4099}
4100
4101fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
4102 let entries = read_tree(odb, tree_oid)?;
4103 let mut result = Vec::new();
4104
4105 for entry in entries {
4106 let name_str = String::from_utf8_lossy(&entry.name);
4107 let path = format_path(prefix, &name_str);
4108 if is_tree_mode(entry.mode) {
4109 let nested = flatten_tree(odb, &entry.oid, &path)?;
4110 result.extend(nested);
4111 } else {
4112 result.push(FlatEntry {
4113 path,
4114 mode: entry.mode,
4115 oid: entry.oid,
4116 });
4117 }
4118 }
4119
4120 Ok(result)
4121}
4122
4123pub fn head_path_states(
4125 odb: &Odb,
4126 head_tree: Option<&ObjectId>,
4127) -> Result<std::collections::BTreeMap<String, (u32, ObjectId)>> {
4128 let mut m = std::collections::BTreeMap::new();
4129 let Some(t) = head_tree else {
4130 return Ok(m);
4131 };
4132 for fe in flatten_tree(odb, t, "")? {
4133 m.insert(fe.path, (fe.mode, fe.oid));
4134 }
4135 Ok(m)
4136}
4137
4138fn is_tree_mode(mode: u32) -> bool {
4140 mode == 0o040000
4141}
4142
4143fn format_path(prefix: &str, name: &str) -> String {
4145 if prefix.is_empty() {
4146 name.to_owned()
4147 } else {
4148 format!("{prefix}/{name}")
4149 }
4150}
4151
4152pub fn format_mode(mode: u32) -> String {
4154 format!("{mode:06o}")
4155}
4156
4157#[must_use]
4161pub fn read_submodule_head_for_checkout(sub_dir: &Path) -> Option<ObjectId> {
4162 read_submodule_head(sub_dir)
4163}
4164
4165#[must_use]
4170pub fn submodule_commit_subject_line(c: &CommitData) -> String {
4171 let enc = c.encoding.as_deref().unwrap_or("UTF-8");
4172 let is_latin1 = enc.eq_ignore_ascii_case("ISO8859-1")
4173 || enc.eq_ignore_ascii_case("ISO-8859-1")
4174 || enc.eq_ignore_ascii_case("LATIN1")
4175 || enc.eq_ignore_ascii_case("ISO-8859-15");
4176 if let Some(raw) = c.raw_message.as_deref() {
4177 let line = raw.split(|b| *b == b'\n').next().unwrap_or(raw);
4178 if is_latin1 {
4179 return line
4180 .iter()
4181 .map(|&b| b as char)
4182 .collect::<String>()
4183 .trim()
4184 .to_owned();
4185 }
4186 return String::from_utf8_lossy(line).trim().to_string();
4187 }
4188 c.message.lines().next().unwrap_or("").trim().to_owned()
4189}
4190
4191fn submodule_worktree_is_unpopulated_placeholder(sub_dir: &Path) -> bool {
4194 match fs::read_dir(sub_dir) {
4195 Ok(mut it) => it.next().is_none(),
4196 Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
4197 Err(_) => false,
4198 }
4199}
4200
4201fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
4202 read_submodule_head_oid(sub_dir)
4203}
4204
4205#[must_use]
4207pub fn submodule_embedded_git_dir(sub_dir: &Path) -> Option<PathBuf> {
4208 let gitfile = sub_dir.join(".git");
4209 if gitfile.is_file() {
4210 let content = fs::read_to_string(&gitfile).ok()?;
4211 let gitdir = content
4212 .lines()
4213 .find_map(|l| l.strip_prefix("gitdir: "))?
4214 .trim();
4215 Some(if Path::new(gitdir).is_absolute() {
4216 PathBuf::from(gitdir)
4217 } else {
4218 sub_dir.join(gitdir)
4219 })
4220 } else if gitfile.is_dir() {
4221 Some(gitfile)
4222 } else {
4223 None
4224 }
4225}
4226
4227fn find_superproject_git(sub_dir: &Path) -> Option<(PathBuf, PathBuf)> {
4229 let mut cur = sub_dir.parent()?;
4230 loop {
4231 let git_path = cur.join(".git");
4232 if git_path.exists() {
4233 let gd = if git_path.is_file() {
4234 let content = fs::read_to_string(&git_path).ok()?;
4235 let line = content
4236 .lines()
4237 .find_map(|l| l.strip_prefix("gitdir: "))?
4238 .trim();
4239 if Path::new(line).is_absolute() {
4240 PathBuf::from(line)
4241 } else {
4242 cur.join(line)
4243 }
4244 } else {
4245 git_path
4246 };
4247 return Some((cur.to_path_buf(), gd));
4248 }
4249 cur = cur.parent()?;
4250 }
4251}
4252
4253pub fn read_submodule_head_oid(sub_dir: &Path) -> Option<ObjectId> {
4259 let mut git_dir = submodule_embedded_git_dir(sub_dir)?;
4262 if let Some((super_wt, super_git_dir)) = find_superproject_git(sub_dir) {
4263 let rel = sub_dir.strip_prefix(&super_wt).ok()?;
4264 let rel_str = rel.to_string_lossy().replace('\\', "/");
4265 let local_mod = super_git_dir
4266 .join("modules")
4267 .join(rel_str.trim_start_matches('/'));
4268 if local_mod.join("HEAD").exists() {
4269 let sg = super_git_dir.canonicalize().unwrap_or(super_git_dir);
4270 let cur = git_dir.canonicalize().unwrap_or_else(|_| git_dir.clone());
4271 if !cur.starts_with(&sg) {
4272 git_dir = local_mod;
4273 }
4274 }
4275 }
4276 let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
4277 let head_trimmed = head_content.trim();
4278 if head_trimmed.starts_with("ref: ") {
4279 match crate::refs::resolve_ref(&git_dir, "HEAD") {
4283 Ok(oid) => Some(oid),
4284 Err(_) => {
4285 let mut found = None;
4286 for branch in ["main", "master"] {
4287 let p = git_dir.join("refs/heads").join(branch);
4288 if let Ok(s) = fs::read_to_string(&p) {
4289 if let Ok(o) = ObjectId::from_hex(s.trim()) {
4290 found = Some(o);
4291 break;
4292 }
4293 }
4294 }
4295 found
4296 }
4297 }
4298 } else {
4299 ObjectId::from_hex(head_trimmed).ok()
4300 }
4301}
4302
4303fn submodule_has_dirty_worktree_for_super_diff(
4306 super_worktree: &Path,
4307 rel_path: &str,
4308 recorded_oid: &ObjectId,
4309) -> bool {
4310 let flags = submodule_porcelain_flags(super_worktree, rel_path, *recorded_oid);
4311 flags.modified || flags.untracked
4312}
4313
4314#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
4316pub struct SubmodulePorcelainFlags {
4317 pub new_commits: bool,
4319 pub modified: bool,
4321 pub untracked: bool,
4323}
4324
4325pub fn submodule_porcelain_flags(
4331 super_worktree: &Path,
4332 rel_path: &str,
4333 recorded_oid: ObjectId,
4334) -> SubmodulePorcelainFlags {
4335 let sub_dir = super_worktree.join(rel_path);
4336 let Some(sub_git_dir) = submodule_embedded_git_dir(&sub_dir) else {
4337 return SubmodulePorcelainFlags::default();
4338 };
4339 let Some(sub_head) = read_submodule_head_oid(&sub_dir) else {
4340 return SubmodulePorcelainFlags::default();
4341 };
4342
4343 let new_commits = sub_head != recorded_oid;
4344
4345 let index_path = sub_git_dir.join("index");
4346 let sub_index = match crate::index::Index::load(&index_path) {
4347 Ok(ix) => ix,
4348 Err(_) => {
4349 return SubmodulePorcelainFlags {
4350 new_commits,
4351 ..Default::default()
4352 }
4353 }
4354 };
4355
4356 let tracked: std::collections::BTreeSet<String> = sub_index
4357 .entries
4358 .iter()
4359 .filter(|e| e.stage() == 0)
4360 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
4361 .collect();
4362 let untracked = submodule_dir_has_untracked_inner(&sub_dir, &sub_dir, &tracked, &sub_index);
4363
4364 let objects_dir = sub_git_dir.join("objects");
4365 let odb = Odb::new(&objects_dir);
4366
4367 let sub_head_tree = (|| -> Option<ObjectId> {
4368 let h = fs::read_to_string(sub_git_dir.join("HEAD")).ok()?;
4369 let h_str = h.trim();
4370 let commit_oid = if let Some(r) = h_str.strip_prefix("ref: ") {
4371 let oid_hex = fs::read_to_string(sub_git_dir.join(r)).ok()?;
4372 ObjectId::from_hex(oid_hex.trim()).ok()?
4373 } else {
4374 ObjectId::from_hex(h_str).ok()?
4375 };
4376 let obj = odb.read(&commit_oid).ok()?;
4377 let commit = parse_commit(&obj.data).ok()?;
4378 Some(commit.tree)
4379 })();
4380
4381 let staged_dirty = sub_head_tree
4382 .as_ref()
4383 .map(|t| diff_index_to_tree(&odb, &sub_index, Some(t), false).map(|v| !v.is_empty()))
4384 .unwrap_or(Ok(false));
4385 let staged_dirty = staged_dirty.unwrap_or(false);
4386
4387 let unstaged_dirty = diff_index_to_worktree(&odb, &sub_index, &sub_dir, false, true)
4388 .map(|v| !v.is_empty())
4389 .unwrap_or(false);
4390
4391 let mut modified = staged_dirty || unstaged_dirty;
4392
4393 for e in &sub_index.entries {
4398 if e.stage() != 0 || e.mode != 0o160000 {
4399 continue;
4400 }
4401 let child = String::from_utf8_lossy(&e.path).into_owned();
4402 let full_rel = if rel_path.is_empty() {
4403 child
4404 } else {
4405 format!("{rel_path}/{child}")
4406 };
4407 let nested = submodule_porcelain_flags(super_worktree, &full_rel, e.oid);
4408 modified |= nested.modified;
4409 }
4410
4411 SubmodulePorcelainFlags {
4412 new_commits,
4413 modified,
4414 untracked,
4415 }
4416}
4417
4418fn submodule_dir_has_untracked_inner(
4419 dir: &Path,
4420 root: &Path,
4421 tracked: &std::collections::BTreeSet<String>,
4422 owning_index: &Index,
4423) -> bool {
4424 let entries = match fs::read_dir(dir) {
4425 Ok(e) => e,
4426 Err(_) => return false,
4427 };
4428 let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
4429 sorted.sort_by_key(|e| e.file_name());
4430
4431 for entry in sorted {
4432 let name = entry.file_name().to_string_lossy().to_string();
4433 if name == ".git" {
4434 continue;
4435 }
4436 let path = entry.path();
4437 let rel = path
4438 .strip_prefix(root)
4439 .map(|p| p.to_string_lossy().to_string())
4440 .unwrap_or_else(|_| name.clone());
4441
4442 let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
4443 if is_dir {
4444 let is_gitlink = owning_index
4445 .get(rel.as_bytes(), 0)
4446 .is_some_and(|e| e.mode == 0o160000);
4447 if is_gitlink {
4448 let Some(nested_git) = submodule_embedded_git_dir(&path) else {
4449 continue;
4450 };
4451 let nested_index_path = nested_git.join("index");
4452 let Ok(nested_ix) = crate::index::Index::load(&nested_index_path) else {
4453 continue;
4454 };
4455 let nested_tracked: std::collections::BTreeSet<String> = nested_ix
4456 .entries
4457 .iter()
4458 .filter(|e| e.stage() == 0)
4459 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
4460 .collect();
4461 if submodule_dir_has_untracked_inner(&path, &path, &nested_tracked, &nested_ix) {
4462 return true;
4463 }
4464 } else if submodule_dir_has_untracked_inner(&path, root, tracked, owning_index) {
4465 return true;
4466 }
4467 } else if !tracked.contains(&rel) {
4468 return true;
4469 }
4470 }
4471 false
4472}