1use std::fs;
24#[cfg(unix)]
25use std::os::unix::fs::MetadataExt;
26use std::path::{Path, PathBuf};
27
28use crate::config::ConfigSet;
29use crate::diff_indent_heuristic;
30use crate::error::{Error, Result};
31use crate::index::{Index, IndexEntry};
32use crate::objects::{parse_commit, parse_tree, CommitData, ObjectId, ObjectKind, TreeEntry};
33use crate::odb::Odb;
34use crate::userdiff::FuncnameMatcher;
35
36fn imara_unified_hunk_slices(body: &str) -> Vec<&str> {
38 let mut starts: Vec<usize> = Vec::new();
39 if body.starts_with("@@") {
40 starts.push(0);
41 }
42 for (idx, _) in body.match_indices("\n@@ ") {
43 starts.push(idx + 1);
44 }
45 starts.push(body.len());
46 starts.windows(2).map(|w| &body[w[0]..w[1]]).collect()
47}
48
49fn imara_context_len_for_git(context_lines: usize, inter_hunk_context: usize) -> u32 {
52 (2usize
53 .saturating_mul(context_lines)
54 .saturating_add(inter_hunk_context))
55 .div_ceil(2)
56 .min(u32::MAX as usize) as u32
57}
58
59fn histogram_unified_body_raw(
60 old_content: &str,
61 new_content: &str,
62 context_lines: usize,
63 inter_hunk_context: usize,
64) -> String {
65 use imara_diff::{Algorithm, BasicLineDiffPrinter, Diff, InternedInput, UnifiedDiffConfig};
66
67 let input = InternedInput::new(old_content, new_content);
68 let mut diff = Diff::compute(Algorithm::Histogram, &input);
69 diff.postprocess_lines(&input);
70 let mut config = UnifiedDiffConfig::default();
71 config.context_len(imara_context_len_for_git(context_lines, inter_hunk_context));
72 let printer = BasicLineDiffPrinter(&input.interner);
73 diff.unified_diff(&printer, config, &input).to_string()
74}
75
76#[must_use]
80pub fn unified_diff_histogram_hunks_only(
81 old_content: &str,
82 new_content: &str,
83 context_lines: usize,
84 inter_hunk_context: usize,
85) -> String {
86 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context)
87}
88
89#[must_use]
91pub fn unified_diff_histogram_with_prefix_and_funcname(
92 old_content: &str,
93 new_content: &str,
94 old_path: &str,
95 new_path: &str,
96 context_lines: usize,
97 inter_hunk_context: usize,
98 src_prefix: &str,
99 dst_prefix: &str,
100 funcname_matcher: Option<&FuncnameMatcher>,
101 quote_path_fully: bool,
102) -> String {
103 use crate::quote_path::format_diff_path_with_prefix;
104
105 let body =
106 histogram_unified_body_raw(old_content, new_content, context_lines, inter_hunk_context);
107
108 let mut output = String::new();
109 if old_path == "/dev/null" {
110 output.push_str("--- /dev/null\n");
111 } else if src_prefix.is_empty() {
112 output.push_str(&format!("--- {old_path}\n"));
113 } else {
114 output.push_str("--- ");
115 output.push_str(&format_diff_path_with_prefix(
116 src_prefix,
117 old_path,
118 quote_path_fully,
119 ));
120 output.push('\n');
121 }
122 if new_path == "/dev/null" {
123 output.push_str("+++ /dev/null\n");
124 } else if dst_prefix.is_empty() {
125 output.push_str(&format!("+++ {new_path}\n"));
126 } else {
127 output.push_str("+++ ");
128 output.push_str(&format_diff_path_with_prefix(
129 dst_prefix,
130 new_path,
131 quote_path_fully,
132 ));
133 output.push('\n');
134 }
135
136 let old_lines: Vec<&str> = old_content.lines().collect();
137 for hunk_str in imara_unified_hunk_slices(&body) {
138 if hunk_str.is_empty() {
139 continue;
140 }
141 if let Some(first_newline) = hunk_str.find('\n') {
142 let header_line = &hunk_str[..first_newline];
143 let rest = &hunk_str[first_newline..];
144 if let Some(func_ctx) =
145 extract_function_context(header_line, &old_lines, funcname_matcher)
146 {
147 output.push_str(header_line);
148 output.push(' ');
149 output.push_str(&func_ctx);
150 output.push_str(rest);
151 } else {
152 output.push_str(hunk_str);
153 }
154 } else {
155 output.push_str(hunk_str);
156 }
157 }
158
159 output
160}
161
162#[must_use]
164pub fn indent_heuristic_from_config(config: &ConfigSet) -> bool {
165 match config.get_bool("diff.indentHeuristic") {
166 Some(Ok(b)) => b,
167 Some(Err(_)) | None => true,
168 }
169}
170
171#[must_use]
173pub fn resolve_indent_heuristic(
174 config: &ConfigSet,
175 cli_indent_heuristic: bool,
176 cli_no_indent_heuristic: bool,
177) -> bool {
178 if cli_no_indent_heuristic {
179 false
180 } else if cli_indent_heuristic {
181 true
182 } else {
183 indent_heuristic_from_config(config)
184 }
185}
186
187#[must_use]
189pub fn parse_indent_heuristic_cli_flags(argv: &[String]) -> (bool, bool) {
190 let mut indent_heuristic = false;
191 let mut no_indent_heuristic = false;
192 for a in argv {
193 match a.as_str() {
194 "--indent-heuristic" => {
195 indent_heuristic = true;
196 no_indent_heuristic = false;
197 }
198 "--no-indent-heuristic" => {
199 no_indent_heuristic = true;
200 indent_heuristic = false;
201 }
202 _ => {}
203 }
204 }
205 (indent_heuristic, no_indent_heuristic)
206}
207
208#[must_use]
210pub fn diff_slice_ops_compacted(
211 old_lines: &[&str],
212 new_lines: &[&str],
213 algorithm: similar::Algorithm,
214 indent_heuristic: bool,
215) -> Vec<similar::DiffOp> {
216 diff_indent_heuristic::diff_slice_ops_compacted(
217 old_lines,
218 new_lines,
219 algorithm,
220 indent_heuristic,
221 )
222}
223
224#[must_use]
226pub fn map_new_to_old_lines_compacted(
227 old_joined: &str,
228 new_joined: &str,
229 algorithm: similar::Algorithm,
230 indent_heuristic: bool,
231 new_line_count: usize,
232) -> Vec<Option<usize>> {
233 let ops = diff_indent_heuristic::diff_lines_ops_compacted(
234 old_joined,
235 new_joined,
236 algorithm,
237 indent_heuristic,
238 );
239 diff_indent_heuristic::map_new_to_old_from_ops(&ops, new_line_count)
240}
241
242#[derive(Debug, Clone, Copy, PartialEq, Eq)]
244pub enum DiffStatus {
245 Added,
247 Deleted,
249 Modified,
251 Renamed,
253 Copied,
255 TypeChanged,
257 Unmerged,
259}
260
261impl DiffStatus {
262 #[must_use]
264 pub fn letter(&self) -> char {
265 match self {
266 Self::Added => 'A',
267 Self::Deleted => 'D',
268 Self::Modified => 'M',
269 Self::Renamed => 'R',
270 Self::Copied => 'C',
271 Self::TypeChanged => 'T',
272 Self::Unmerged => 'U',
273 }
274 }
275}
276
277#[derive(Debug, Clone, PartialEq, Eq)]
279pub struct DiffEntry {
280 pub status: DiffStatus,
282 pub old_path: Option<String>,
284 pub new_path: Option<String>,
286 pub old_mode: String,
288 pub new_mode: String,
290 pub old_oid: ObjectId,
292 pub new_oid: ObjectId,
294 pub score: Option<u32>,
296}
297
298impl DiffEntry {
299 #[must_use]
301 pub fn path(&self) -> &str {
302 self.new_path
303 .as_deref()
304 .or(self.old_path.as_deref())
305 .unwrap_or("")
306 }
307
308 #[must_use]
313 pub fn display_path(&self) -> String {
314 match self.status {
315 DiffStatus::Renamed | DiffStatus::Copied => {
316 let old = self.old_path.as_deref().unwrap_or("");
317 let new = self.new_path.as_deref().unwrap_or("");
318 if old.is_empty() || new.is_empty() {
319 self.path().to_owned()
320 } else {
321 format!("{old} -> {new}")
322 }
323 }
324 _ => self.path().to_owned(),
325 }
326 }
327}
328
329pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
331
332#[must_use]
334pub fn zero_oid() -> ObjectId {
335 ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
336 panic!("internal error: failed to create zero OID");
338 })
339}
340
341#[must_use]
343pub fn empty_blob_oid() -> ObjectId {
344 ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
345 panic!("internal error: failed to create empty blob OID");
347 })
348}
349
350pub fn diff_trees(
365 odb: &Odb,
366 old_tree_oid: Option<&ObjectId>,
367 new_tree_oid: Option<&ObjectId>,
368 prefix: &str,
369) -> Result<Vec<DiffEntry>> {
370 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
371}
372
373pub fn diff_trees_show_tree_entries(
377 odb: &Odb,
378 old_tree_oid: Option<&ObjectId>,
379 new_tree_oid: Option<&ObjectId>,
380 prefix: &str,
381) -> Result<Vec<DiffEntry>> {
382 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
383}
384
385fn diff_trees_opts(
386 odb: &Odb,
387 old_tree_oid: Option<&ObjectId>,
388 new_tree_oid: Option<&ObjectId>,
389 prefix: &str,
390 show_trees: bool,
391) -> Result<Vec<DiffEntry>> {
392 let old_entries = match old_tree_oid {
393 Some(oid) => read_tree(odb, oid)?,
394 None => Vec::new(),
395 };
396 let new_entries = match new_tree_oid {
397 Some(oid) => read_tree(odb, oid)?,
398 None => Vec::new(),
399 };
400
401 let mut result = Vec::new();
402 diff_tree_entries_opts(
403 odb,
404 &old_entries,
405 &new_entries,
406 prefix,
407 show_trees,
408 &mut result,
409 )?;
410 Ok(result)
411}
412
413fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
415 let obj = odb.read(oid)?;
416 if obj.kind != ObjectKind::Tree {
417 return Err(Error::CorruptObject(format!(
418 "expected tree, got {}",
419 obj.kind.as_str()
420 )));
421 }
422 parse_tree(&obj.data)
423}
424
425fn diff_tree_entries_opts(
427 odb: &Odb,
428 old: &[TreeEntry],
429 new: &[TreeEntry],
430 prefix: &str,
431 show_trees: bool,
432 result: &mut Vec<DiffEntry>,
433) -> Result<()> {
434 let mut oi = 0;
435 let mut ni = 0;
436
437 while oi < old.len() || ni < new.len() {
438 match (old.get(oi), new.get(ni)) {
439 (Some(o), Some(n)) => {
440 let cmp = crate::objects::tree_entry_cmp(
441 &o.name,
442 is_tree_mode(o.mode),
443 &n.name,
444 is_tree_mode(n.mode),
445 );
446 match cmp {
447 std::cmp::Ordering::Less => {
448 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
450 oi += 1;
451 }
452 std::cmp::Ordering::Greater => {
453 emit_added_opts(odb, n, prefix, show_trees, result)?;
455 ni += 1;
456 }
457 std::cmp::Ordering::Equal => {
458 if o.oid != n.oid || o.mode != n.mode {
460 let name_str = String::from_utf8_lossy(&o.name);
461 let path = format_path(prefix, &name_str);
462 if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
463 if show_trees {
465 result.push(DiffEntry {
466 status: DiffStatus::Modified,
467 old_path: Some(path.clone()),
468 new_path: Some(path.clone()),
469 old_mode: format_mode(o.mode),
470 new_mode: format_mode(n.mode),
471 old_oid: o.oid,
472 new_oid: n.oid,
473 score: None,
474 });
475 }
476 let nested = diff_trees_opts(
478 odb,
479 Some(&o.oid),
480 Some(&n.oid),
481 &path,
482 show_trees,
483 )?;
484 result.extend(nested);
485 } else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
486 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
488 emit_added_opts(odb, n, prefix, show_trees, result)?;
489 } else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
490 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
492 emit_added_opts(odb, n, prefix, show_trees, result)?;
493 } else {
494 let old_type = o.mode & 0o170000;
498 let new_type = n.mode & 0o170000;
499 result.push(DiffEntry {
500 status: if old_type != new_type {
501 DiffStatus::TypeChanged
502 } else {
503 DiffStatus::Modified
504 },
505 old_path: Some(path.clone()),
506 new_path: Some(path),
507 old_mode: format_mode(o.mode),
508 new_mode: format_mode(n.mode),
509 old_oid: o.oid,
510 new_oid: n.oid,
511 score: None,
512 });
513 }
514 }
515 oi += 1;
516 ni += 1;
517 }
518 }
519 }
520 (Some(o), None) => {
521 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
522 oi += 1;
523 }
524 (None, Some(n)) => {
525 emit_added_opts(odb, n, prefix, show_trees, result)?;
526 ni += 1;
527 }
528 (None, None) => break,
529 }
530 }
531
532 Ok(())
533}
534
535fn emit_deleted_opts(
536 odb: &Odb,
537 entry: &TreeEntry,
538 prefix: &str,
539 show_trees: bool,
540 result: &mut Vec<DiffEntry>,
541) -> Result<()> {
542 let name_str = String::from_utf8_lossy(&entry.name);
543 let path = format_path(prefix, &name_str);
544 if is_tree_mode(entry.mode) {
545 if show_trees {
546 result.push(DiffEntry {
547 status: DiffStatus::Deleted,
548 old_path: Some(path.clone()),
549 new_path: None,
550 old_mode: format_mode(entry.mode),
551 new_mode: "000000".to_owned(),
552 old_oid: entry.oid,
553 new_oid: zero_oid(),
554 score: None,
555 });
556 }
557 let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
559 result.extend(nested);
560 } else {
561 result.push(DiffEntry {
562 status: DiffStatus::Deleted,
563 old_path: Some(path.clone()),
564 new_path: None,
565 old_mode: format_mode(entry.mode),
566 new_mode: "000000".to_owned(),
567 old_oid: entry.oid,
568 new_oid: zero_oid(),
569 score: None,
570 });
571 }
572 Ok(())
573}
574
575fn emit_added_opts(
576 odb: &Odb,
577 entry: &TreeEntry,
578 prefix: &str,
579 show_trees: bool,
580 result: &mut Vec<DiffEntry>,
581) -> Result<()> {
582 let name_str = String::from_utf8_lossy(&entry.name);
583 let path = format_path(prefix, &name_str);
584 if is_tree_mode(entry.mode) {
585 if show_trees {
586 result.push(DiffEntry {
587 status: DiffStatus::Added,
588 old_path: None,
589 new_path: Some(path.clone()),
590 old_mode: "000000".to_owned(),
591 new_mode: format_mode(entry.mode),
592 old_oid: zero_oid(),
593 new_oid: entry.oid,
594 score: None,
595 });
596 }
597 let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
599 result.extend(nested);
600 } else {
601 result.push(DiffEntry {
602 status: DiffStatus::Added,
603 old_path: None,
604 new_path: Some(path),
605 old_mode: "000000".to_owned(),
606 new_mode: format_mode(entry.mode),
607 old_oid: zero_oid(),
608 new_oid: entry.oid,
609 score: None,
610 });
611 }
612 Ok(())
613}
614
615pub fn diff_index_to_tree(
635 odb: &Odb,
636 index: &Index,
637 tree_oid: Option<&ObjectId>,
638 ignore_submodules: bool,
639) -> Result<Vec<DiffEntry>> {
640 let tree_entries = match tree_oid {
642 Some(oid) => flatten_tree(odb, oid, "")?,
643 None => Vec::new(),
644 };
645
646 let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
648 std::collections::BTreeMap::new();
649 for entry in &tree_entries {
650 tree_map.insert(&entry.path, entry);
651 }
652
653 let mut result = Vec::new();
654 let mut stage0_paths = std::collections::BTreeSet::new();
655 let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
656 std::collections::BTreeMap::new();
657
658 for ie in &index.entries {
660 let path = String::from_utf8_lossy(&ie.path).to_string();
661 if ie.stage() == 0 && ie.intent_to_add() {
662 continue;
665 }
666 if ie.stage() != 0 {
667 let rank = match ie.stage() {
668 2 => 0u8,
669 3 => 1u8,
670 1 => 2u8,
671 _ => 3u8,
672 };
673 match unmerged_modes.get(&path) {
674 Some((existing_rank, _)) if *existing_rank <= rank => {}
675 _ => {
676 unmerged_modes.insert(path, (rank, ie.mode));
677 }
678 }
679 continue;
680 }
681 if ignore_submodules && ie.mode == 0o160000 {
682 let _ = tree_map.remove(path.as_str());
683 stage0_paths.insert(path.clone());
684 continue;
685 }
686 stage0_paths.insert(path.clone());
687 match tree_map.remove(path.as_str()) {
688 Some(te) => {
689 if te.oid != ie.oid || te.mode != ie.mode {
691 result.push(DiffEntry {
692 status: DiffStatus::Modified,
693 old_path: Some(path.clone()),
694 new_path: Some(path),
695 old_mode: format_mode(te.mode),
696 new_mode: format_mode(ie.mode),
697 old_oid: te.oid,
698 new_oid: ie.oid,
699 score: None,
700 });
701 }
702 }
703 None => {
704 result.push(DiffEntry {
706 status: DiffStatus::Added,
707 old_path: None,
708 new_path: Some(path),
709 old_mode: "000000".to_owned(),
710 new_mode: format_mode(ie.mode),
711 old_oid: zero_oid(),
712 new_oid: ie.oid,
713 score: None,
714 });
715 }
716 }
717 }
718
719 for (path, (_, mode)) in &unmerged_modes {
720 if stage0_paths.contains(path) {
721 continue;
722 }
723 tree_map.remove(path.as_str());
724 result.push(DiffEntry {
725 status: DiffStatus::Unmerged,
726 old_path: Some(path.clone()),
727 new_path: Some(path.clone()),
728 old_mode: "000000".to_owned(),
729 new_mode: format_mode(*mode),
730 old_oid: zero_oid(),
731 new_oid: zero_oid(),
732 score: None,
733 });
734 }
735
736 for (path, te) in tree_map {
738 if ignore_submodules && te.mode == 0o160000 {
739 continue;
740 }
741 result.push(DiffEntry {
742 status: DiffStatus::Deleted,
743 old_path: Some(path.to_owned()),
744 new_path: None,
745 old_mode: format_mode(te.mode),
746 new_mode: "000000".to_owned(),
747 old_oid: te.oid,
748 new_oid: zero_oid(),
749 score: None,
750 });
751 }
752
753 result.sort_by(|a, b| a.path().cmp(b.path()));
754 Ok(result)
755}
756
757pub fn diff_index_to_worktree(
782 odb: &Odb,
783 index: &Index,
784 work_tree: &Path,
785 ignore_submodule_untracked: bool,
786 simplify_gitlinks: bool,
787) -> Result<Vec<DiffEntry>> {
788 diff_index_to_worktree_with_options(
789 odb,
790 index,
791 work_tree,
792 DiffIndexToWorktreeOptions {
793 ignore_submodule_untracked,
794 simplify_gitlinks,
795 ..DiffIndexToWorktreeOptions::default()
796 },
797 )
798}
799
800#[derive(Debug, Clone, Copy, Default)]
802pub struct DiffIndexToWorktreeOptions {
803 pub index_mtime: Option<(u32, u32)>,
809 pub ignore_submodule_untracked: bool,
811 pub simplify_gitlinks: bool,
813}
814
815pub fn diff_index_to_worktree_with_options(
832 odb: &Odb,
833 index: &Index,
834 work_tree: &Path,
835 options: DiffIndexToWorktreeOptions,
836) -> Result<Vec<DiffEntry>> {
837 use crate::config::ConfigSet;
838 use crate::crlf;
839
840 let ignore_submodule_untracked = options.ignore_submodule_untracked;
841 let simplify_gitlinks = options.simplify_gitlinks;
842
843 let git_dir = work_tree.join(".git");
844 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
845 let conv = crlf::ConversionConfig::from_config(&config);
846 let attrs = crlf::load_gitattributes(work_tree);
847
848 let mut result = Vec::new();
849 let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
850 std::collections::BTreeMap::new();
851
852 for ie in &index.entries {
853 if ie.stage() != 0 {
854 let path = String::from_utf8_lossy(&ie.path).to_string();
855 let rank = match ie.stage() {
856 2 => 0u8,
857 3 => 1u8,
858 1 => 2u8,
859 _ => 3u8,
860 };
861 match unmerged_base.get(&path) {
862 Some((existing_rank, _)) if *existing_rank <= rank => {}
863 _ => {
864 unmerged_base.insert(path, (rank, ie));
865 }
866 }
867 continue;
868 }
869 if ie.skip_worktree() || ie.assume_unchanged() {
872 continue;
873 }
874 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
877 let is_intent_to_add = ie.intent_to_add();
878
879 if ie.mode == 0o160000 {
884 let sub_dir = work_tree.join(path_str_ref);
885 let sub_head_oid = read_submodule_head_oid(&sub_dir);
886 let ref_matches = match sub_head_oid {
887 Some(oid) => oid == ie.oid,
888 None => submodule_worktree_is_unpopulated_placeholder(&sub_dir),
889 };
890 if simplify_gitlinks {
891 if !ref_matches {
892 let path_owned = path_str_ref.to_owned();
893 let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
894 result.push(DiffEntry {
895 status: DiffStatus::Modified,
896 old_path: Some(path_owned.clone()),
897 new_path: Some(path_owned),
898 old_mode: format_mode(ie.mode),
899 new_mode: format_mode(ie.mode),
900 old_oid: ie.oid,
901 new_oid,
902 score: None,
903 });
904 }
905 continue;
906 }
907 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
908 if ignore_submodule_untracked {
909 flags.untracked = false;
910 }
911 let inner_dirty = flags.modified || flags.untracked;
912 if !ref_matches || inner_dirty {
913 let path_owned = path_str_ref.to_owned();
914 let new_oid = if !ref_matches {
915 sub_head_oid.unwrap_or_else(zero_oid)
916 } else {
917 zero_oid()
918 };
919 result.push(DiffEntry {
920 status: DiffStatus::Modified,
921 old_path: Some(path_owned.clone()),
922 new_path: Some(path_owned),
923 old_mode: format_mode(ie.mode),
924 new_mode: format_mode(ie.mode),
925 old_oid: ie.oid,
926 new_oid,
927 score: None,
928 });
929 }
930 continue;
931 }
932
933 let file_path = work_tree.join(path_str_ref);
934
935 if is_intent_to_add {
936 match fs::symlink_metadata(&file_path) {
937 Ok(meta) => {
938 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
939 let worktree_oid = hash_worktree_file(
940 odb,
941 &file_path,
942 &meta,
943 &conv,
944 &file_attrs,
945 path_str_ref,
946 None,
947 )?;
948 let worktree_mode = mode_from_metadata(&meta);
949 result.push(DiffEntry {
950 status: DiffStatus::Added,
951 old_path: None,
952 new_path: Some(path_str_ref.to_owned()),
953 old_mode: "000000".to_owned(),
954 new_mode: format_mode(worktree_mode),
955 old_oid: zero_oid(),
958 new_oid: worktree_oid,
959 score: None,
960 });
961 }
962 Err(e)
963 if e.kind() == std::io::ErrorKind::NotFound
964 || e.raw_os_error() == Some(20) =>
965 {
966 result.push(DiffEntry {
967 status: DiffStatus::Deleted,
968 old_path: Some(path_str_ref.to_owned()),
969 new_path: None,
970 old_mode: format_mode(ie.mode),
971 new_mode: "000000".to_owned(),
972 old_oid: ie.oid,
973 new_oid: zero_oid(),
974 score: None,
975 });
976 }
977 Err(e) => return Err(Error::Io(e)),
978 }
979 continue;
980 }
981
982 if has_symlink_in_path(work_tree, path_str_ref) {
985 result.push(DiffEntry {
986 status: DiffStatus::Deleted,
987 old_path: Some(path_str_ref.to_owned()),
988 new_path: None,
989 old_mode: format_mode(ie.mode),
990 new_mode: "000000".to_owned(),
991 old_oid: ie.oid,
992 new_oid: zero_oid(),
993 score: None,
994 });
995 continue;
996 }
997
998 match fs::symlink_metadata(&file_path) {
999 Ok(meta) if meta.is_dir() => {
1000 result.push(DiffEntry {
1003 status: DiffStatus::Deleted,
1004 old_path: Some(path_str_ref.to_owned()),
1005 new_path: None,
1006 old_mode: format_mode(ie.mode),
1007 new_mode: String::new(),
1008 old_oid: ie.oid,
1009 new_oid: zero_oid(),
1010 score: None,
1011 });
1012 }
1013 Ok(meta) => {
1014 let worktree_mode = mode_from_metadata(&meta);
1015 let stat_same = stat_matches(ie, &meta);
1016 if stat_same && worktree_mode != ie.mode {
1018 let path_owned = path_str_ref.to_owned();
1019 result.push(DiffEntry {
1020 status: DiffStatus::Modified,
1021 old_path: Some(path_owned.clone()),
1022 new_path: Some(path_owned),
1023 old_mode: format_mode(ie.mode),
1024 new_mode: format_mode(worktree_mode),
1025 old_oid: ie.oid,
1026 new_oid: ie.oid,
1027 score: None,
1028 });
1029 continue;
1030 }
1031
1032 if stat_same && worktree_mode == ie.mode && !entry_is_racy(ie, options.index_mtime) {
1035 continue;
1036 }
1037
1038 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
1040 let worktree_oid = hash_worktree_file(
1041 odb,
1042 &file_path,
1043 &meta,
1044 &conv,
1045 &file_attrs,
1046 path_str_ref,
1047 Some(ie),
1048 )?;
1049
1050 let mut eff_oid = worktree_oid;
1054 if eff_oid != ie.oid {
1055 if let Ok(raw) = fs::read(&file_path) {
1056 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1057 if raw_oid == ie.oid {
1058 eff_oid = ie.oid;
1059 }
1060 }
1061 }
1062
1063 if eff_oid != ie.oid || worktree_mode != ie.mode {
1064 let path_owned = path_str_ref.to_owned();
1065 result.push(DiffEntry {
1066 status: DiffStatus::Modified,
1067 old_path: Some(path_owned.clone()),
1068 new_path: Some(path_owned),
1069 old_mode: format_mode(ie.mode),
1070 new_mode: format_mode(worktree_mode),
1071 old_oid: ie.oid,
1072 new_oid: eff_oid,
1073 score: None,
1074 });
1075 }
1076 }
1077 Err(e) if e.kind() == std::io::ErrorKind::NotFound
1078 || e.raw_os_error() == Some(20) => {
1079 result.push(DiffEntry {
1081 status: DiffStatus::Deleted,
1082 old_path: Some(path_str_ref.to_owned()),
1083 new_path: None,
1084 old_mode: format_mode(ie.mode),
1085 new_mode: "000000".to_owned(),
1086 old_oid: ie.oid,
1087 new_oid: zero_oid(),
1088 score: None,
1089 });
1090 }
1091 Err(e) => return Err(Error::Io(e)),
1092 }
1093 }
1094
1095 for (path, (_, base_entry)) in unmerged_base {
1096 let file_path = work_tree.join(&path);
1097 let wt_meta = match fs::symlink_metadata(&file_path) {
1098 Ok(meta) => Some(meta),
1099 Err(e)
1100 if e.kind() == std::io::ErrorKind::NotFound
1101 || e.raw_os_error() == Some(20) =>
1102 {
1103 None
1104 }
1105 Err(e) => return Err(Error::Io(e)),
1106 };
1107
1108 let new_mode = wt_meta.as_ref().map_or_else(
1109 || "000000".to_owned(),
1110 |meta| format_mode(mode_from_metadata(meta)),
1111 );
1112 result.push(DiffEntry {
1113 status: DiffStatus::Unmerged,
1114 old_path: Some(path.clone()),
1115 new_path: Some(path.clone()),
1116 old_mode: "000000".to_owned(),
1117 new_mode,
1118 old_oid: zero_oid(),
1119 new_oid: zero_oid(),
1120 score: None,
1121 });
1122
1123 if let Some(meta) = wt_meta {
1124 let file_attrs = crlf::get_file_attrs(&attrs, &path, false, &config);
1125 let wt_oid = hash_worktree_file(
1126 odb,
1127 &file_path,
1128 &meta,
1129 &conv,
1130 &file_attrs,
1131 &path,
1132 Some(base_entry),
1133 )?;
1134 let wt_mode = mode_from_metadata(&meta);
1135 if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
1136 result.push(DiffEntry {
1137 status: DiffStatus::Modified,
1138 old_path: Some(path.clone()),
1139 new_path: Some(path),
1140 old_mode: format_mode(base_entry.mode),
1141 new_mode: format_mode(wt_mode),
1142 old_oid: base_entry.oid,
1143 new_oid: wt_oid,
1144 score: None,
1145 });
1146 }
1147 }
1148 }
1149
1150 Ok(result)
1151}
1152
1153fn entry_is_racy(ie: &IndexEntry, index_mtime: Option<(u32, u32)>) -> bool {
1154 let Some((index_mtime_sec, index_mtime_nsec)) = index_mtime else {
1155 return false;
1156 };
1157 if index_mtime_sec == 0 {
1158 return false;
1159 }
1160 index_mtime_sec < ie.mtime_sec
1161 || (index_mtime_sec == ie.mtime_sec && index_mtime_nsec <= ie.mtime_nsec)
1162}
1163
1164pub fn worktree_differs_from_index_entry(
1172 odb: &Odb,
1173 work_tree: &Path,
1174 ie: &IndexEntry,
1175 ignore_submodule_untracked: bool,
1176) -> Result<bool> {
1177 use crate::config::ConfigSet;
1178 use crate::crlf;
1179
1180 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
1181 let file_path = work_tree.join(path_str_ref);
1182
1183 if ie.mode == 0o160000 {
1184 let sub_head_oid = read_submodule_head(&file_path);
1185 let ref_matches = match sub_head_oid {
1186 Some(oid) => oid == ie.oid,
1187 None => submodule_worktree_is_unpopulated_placeholder(&file_path),
1188 };
1189 let mut flags = submodule_porcelain_flags(work_tree, path_str_ref, ie.oid);
1190 if ignore_submodule_untracked {
1191 flags.untracked = false;
1192 }
1193 return Ok(!ref_matches || flags.modified || flags.untracked);
1194 }
1195
1196 let meta = match fs::symlink_metadata(&file_path) {
1197 Ok(m) => m,
1198 Err(e)
1199 if e.kind() == std::io::ErrorKind::NotFound
1200 || e.raw_os_error() == Some(20) =>
1201 {
1202 return Ok(true);
1203 }
1204 Err(e) => return Err(Error::Io(e)),
1205 };
1206
1207 if meta.is_dir() {
1208 return Ok(true);
1209 }
1210
1211 let worktree_mode = mode_from_metadata(&meta);
1212 if worktree_mode != ie.mode {
1213 return Ok(true);
1214 }
1215
1216 let git_dir = work_tree.join(".git");
1217 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1218 let conv = crlf::ConversionConfig::from_config(&config);
1219 let attrs = crlf::load_gitattributes(work_tree);
1220 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
1221 let worktree_oid = hash_worktree_file(
1222 odb,
1223 &file_path,
1224 &meta,
1225 &conv,
1226 &file_attrs,
1227 path_str_ref,
1228 Some(ie),
1229 )?;
1230
1231 let mut eff_oid = worktree_oid;
1232 if eff_oid != ie.oid {
1233 if let Ok(raw) = fs::read(&file_path) {
1234 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1235 if raw_oid == ie.oid {
1236 eff_oid = ie.oid;
1237 }
1238 }
1239 }
1240
1241 Ok(eff_oid != ie.oid)
1242}
1243
1244pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
1245 if meta.len() as u32 != ie.size {
1247 return false;
1248 }
1249 #[cfg(unix)]
1250 {
1251 use std::os::unix::fs::MetadataExt;
1252 if meta.mtime() as u32 != ie.mtime_sec {
1254 return false;
1255 }
1256 if meta.mtime_nsec() as u32 != ie.mtime_nsec {
1257 return false;
1258 }
1259 if meta.ctime() as u32 != ie.ctime_sec {
1261 return false;
1262 }
1263 if meta.ctime_nsec() as u32 != ie.ctime_nsec {
1264 return false;
1265 }
1266 if meta.ino() as u32 != ie.ino {
1268 return false;
1269 }
1270 if meta.dev() as u32 != ie.dev {
1271 return false;
1272 }
1273 }
1274 #[cfg(not(unix))]
1275 {
1276 use std::time::UNIX_EPOCH;
1277 if let Ok(mtime) = meta.modified() {
1278 if let Ok(dur) = mtime.duration_since(UNIX_EPOCH) {
1279 if dur.as_secs() as u32 != ie.mtime_sec {
1280 return false;
1281 }
1282 if dur.subsec_nanos() != ie.mtime_nsec {
1283 return false;
1284 }
1285 }
1286 }
1287 }
1288 true
1289}
1290
1291fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
1294 let mut check = work_tree.to_path_buf();
1295 let components: Vec<&str> = rel_path.split('/').collect();
1296 for component in &components[..components.len().saturating_sub(1)] {
1298 check.push(component);
1299 match fs::symlink_metadata(&check) {
1300 Ok(meta) if meta.file_type().is_symlink() => return true,
1301 _ => {}
1302 }
1303 }
1304 false
1305}
1306
1307pub fn hash_worktree_file(
1308 odb: &Odb,
1309 path: &Path,
1310 meta: &fs::Metadata,
1311 conv: &crate::crlf::ConversionConfig,
1312 file_attrs: &crate::crlf::FileAttrs,
1313 rel_path: &str,
1314 index_entry: Option<&IndexEntry>,
1315) -> Result<ObjectId> {
1316 let prior_blob: Option<Vec<u8>> = index_entry
1317 .filter(|e| e.oid != zero_oid())
1318 .and_then(|e| odb.read(&e.oid).ok().map(|o| o.data));
1319 let data = if meta.file_type().is_symlink() {
1320 let target = fs::read_link(path)?;
1322 target.to_string_lossy().into_owned().into_bytes()
1323 } else if meta.is_dir() {
1324 Vec::new()
1327 } else {
1328 let raw = fs::read(path)?;
1329 let opts = crate::crlf::ConvertToGitOpts {
1332 index_blob: prior_blob.as_deref(),
1333 renormalize: false,
1334 check_safecrlf: false,
1335 };
1336 crate::crlf::convert_to_git_with_opts(&raw, rel_path, conv, file_attrs, opts).unwrap_or(raw)
1337 };
1338
1339 Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
1340}
1341
1342pub fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
1344 if meta.file_type().is_symlink() {
1345 0o120000
1346 } else {
1347 #[cfg(unix)]
1348 {
1349 if meta.mode() & 0o111 != 0 {
1350 return 0o100755;
1351 }
1352 }
1353 0o100644
1354 }
1355}
1356
1357pub fn diff_tree_to_worktree(
1374 odb: &Odb,
1375 tree_oid: Option<&ObjectId>,
1376 work_tree: &Path,
1377 index: &Index,
1378) -> Result<Vec<DiffEntry>> {
1379 use crate::config::ConfigSet;
1380 use crate::crlf;
1381
1382 let git_dir = work_tree.join(".git");
1383 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1384 let conv = crlf::ConversionConfig::from_config(&config);
1385 let attrs = crlf::load_gitattributes(work_tree);
1386
1387 let tree_flat = match tree_oid {
1389 Some(oid) => flatten_tree(odb, oid, "")?,
1390 None => Vec::new(),
1391 };
1392 let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
1393 tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
1394
1395 let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
1397 std::collections::BTreeMap::new();
1398 let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1399 let mut stage0_paths: std::collections::BTreeSet<Vec<u8>> = std::collections::BTreeSet::new();
1400 for ie in &index.entries {
1401 if ie.stage() != 0 {
1402 continue;
1403 }
1404 let path = String::from_utf8_lossy(&ie.path).to_string();
1405 index_entries.insert(&ie.path, ie);
1406 index_paths.insert(path);
1407 stage0_paths.insert(ie.path.clone());
1408 }
1409
1410 let mut unmerged_only_paths: std::collections::BTreeSet<String> =
1413 std::collections::BTreeSet::new();
1414 for ie in &index.entries {
1415 if !(1..=3).contains(&ie.stage()) {
1416 continue;
1417 }
1418 if stage0_paths.contains(&ie.path) {
1419 continue;
1420 }
1421 unmerged_only_paths.insert(String::from_utf8_lossy(&ie.path).into_owned());
1422 }
1423
1424 let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1426 all_paths.extend(tree_map.keys().cloned());
1427 all_paths.extend(index_paths.iter().cloned());
1428 all_paths.extend(unmerged_only_paths.iter().cloned());
1429
1430 let mut result = Vec::new();
1431
1432 for path in &all_paths {
1433 if index_entries
1434 .get(path.as_bytes())
1435 .is_some_and(|ie| ie.skip_worktree())
1436 {
1437 continue;
1440 }
1441
1442 let tree_entry = tree_map.get(path.as_str());
1443
1444 let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
1446 || index_entries
1447 .get(path.as_bytes())
1448 .is_some_and(|ie| ie.mode == 0o160000);
1449 if is_gitlink {
1450 if let Some(te) = tree_entry {
1451 let sub_dir = work_tree.join(path);
1452 let sub_head = read_submodule_head_oid(&sub_dir);
1453 let index_oid = index_entries
1454 .get(path.as_bytes())
1455 .filter(|ie| ie.mode == 0o160000)
1456 .map(|ie| ie.oid);
1457 let index_matches_tree = index_oid.is_some_and(|oid| oid == te.oid);
1458 let head_differs = sub_head.as_ref() != Some(&te.oid);
1459 let dirty_while_aligned = index_matches_tree
1460 && !head_differs
1461 && submodule_has_dirty_worktree_for_super_diff(work_tree, path, &te.oid);
1462 if head_differs || dirty_while_aligned {
1463 let new_oid = if head_differs { zero_oid() } else { te.oid };
1467 result.push(DiffEntry {
1468 status: DiffStatus::Modified,
1469 old_path: Some(path.clone()),
1470 new_path: Some(path.clone()),
1471 old_mode: format_mode(te.mode),
1472 new_mode: format_mode(te.mode),
1473 old_oid: te.oid,
1474 new_oid,
1475 score: None,
1476 });
1477 }
1478 }
1479 continue;
1480 }
1481
1482 let file_path = work_tree.join(path);
1483
1484 let wt_meta = match fs::symlink_metadata(&file_path) {
1485 Ok(m) => Some(m),
1486 Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
1487 Err(e) => return Err(Error::Io(e)),
1488 };
1489
1490 if unmerged_only_paths.contains(path) {
1491 if let (Some(te), Some(meta)) = (tree_entry, wt_meta.as_ref()) {
1492 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1493 let wt_oid =
1494 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, None)?;
1495 let wt_mode = mode_from_metadata(meta);
1496 if wt_oid != te.oid || wt_mode != te.mode {
1497 result.push(DiffEntry {
1498 status: DiffStatus::Modified,
1499 old_path: Some(path.clone()),
1500 new_path: Some(path.clone()),
1501 old_mode: format_mode(te.mode),
1502 new_mode: format_mode(wt_mode),
1503 old_oid: te.oid,
1504 new_oid: wt_oid,
1505 score: None,
1506 });
1507 }
1508 }
1509 continue;
1510 }
1511
1512 match (tree_entry, wt_meta) {
1513 (Some(te), Some(ref meta)) => {
1514 let wt_mode = mode_from_metadata(meta);
1515 let Some(ie) = index_entries.get(path.as_bytes()) else {
1516 continue;
1517 };
1518
1519 let index_matches_tree = ie.oid == te.oid && ie.mode == te.mode;
1520
1521 if index_matches_tree && wt_mode == te.mode && stat_matches(ie, meta) {
1523 continue;
1524 }
1525
1526 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1527 let idx_ent = index_entries.get(path.as_bytes()).copied();
1528
1529 if ie.oid == te.oid && ie.mode != te.mode {
1531 result.push(DiffEntry {
1532 status: DiffStatus::Modified,
1533 old_path: Some(path.clone()),
1534 new_path: Some(path.clone()),
1535 old_mode: format_mode(te.mode),
1536 new_mode: format_mode(ie.mode),
1537 old_oid: te.oid,
1538 new_oid: te.oid,
1539 score: None,
1540 });
1541 continue;
1542 }
1543
1544 if index_matches_tree {
1547 let wt_oid = hash_worktree_file(
1548 odb,
1549 &file_path,
1550 meta,
1551 &conv,
1552 &file_attrs,
1553 path,
1554 idx_ent,
1555 )?;
1556 let mut eff_oid = wt_oid;
1557 if eff_oid != te.oid {
1558 if let Ok(raw) = fs::read(&file_path) {
1559 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1560 if raw_oid == te.oid {
1561 eff_oid = te.oid;
1562 }
1563 }
1564 }
1565 if eff_oid != te.oid {
1566 result.push(DiffEntry {
1567 status: DiffStatus::Modified,
1568 old_path: Some(path.clone()),
1569 new_path: Some(path.clone()),
1570 old_mode: format_mode(te.mode),
1571 new_mode: format_mode(wt_mode),
1572 old_oid: te.oid,
1573 new_oid: eff_oid,
1574 score: None,
1575 });
1576 } else if wt_mode != te.mode {
1577 result.push(DiffEntry {
1578 status: DiffStatus::Modified,
1579 old_path: Some(path.clone()),
1580 new_path: Some(path.clone()),
1581 old_mode: format_mode(te.mode),
1582 new_mode: format_mode(wt_mode),
1583 old_oid: te.oid,
1584 new_oid: te.oid,
1585 score: None,
1586 });
1587 }
1588 continue;
1589 }
1590
1591 let wt_oid =
1593 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, idx_ent)?;
1594 let mut eff_oid = wt_oid;
1595 if eff_oid != te.oid {
1596 if let Ok(raw) = fs::read(&file_path) {
1597 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1598 if raw_oid == te.oid {
1599 eff_oid = te.oid;
1600 }
1601 }
1602 }
1603 if eff_oid != te.oid || wt_mode != te.mode {
1604 result.push(DiffEntry {
1605 status: DiffStatus::Modified,
1606 old_path: Some(path.clone()),
1607 new_path: Some(path.clone()),
1608 old_mode: format_mode(te.mode),
1609 new_mode: format_mode(wt_mode),
1610 old_oid: te.oid,
1611 new_oid: eff_oid,
1612 score: None,
1613 });
1614 }
1615 }
1616 (Some(te), None) => {
1617 result.push(DiffEntry {
1619 status: DiffStatus::Deleted,
1620 old_path: Some(path.clone()),
1621 new_path: None,
1622 old_mode: format_mode(te.mode),
1623 new_mode: "000000".to_owned(),
1624 old_oid: te.oid,
1625 new_oid: zero_oid(),
1626 score: None,
1627 });
1628 }
1629 (None, Some(ref meta)) => {
1630 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1632 let wt_oid = hash_worktree_file(
1633 odb,
1634 &file_path,
1635 meta,
1636 &conv,
1637 &file_attrs,
1638 path,
1639 index_entries.get(path.as_bytes()).copied(),
1640 )?;
1641 let wt_mode = mode_from_metadata(meta);
1642 result.push(DiffEntry {
1643 status: DiffStatus::Added,
1644 old_path: None,
1645 new_path: Some(path.clone()),
1646 old_mode: "000000".to_owned(),
1647 new_mode: format_mode(wt_mode),
1648 old_oid: zero_oid(),
1649 new_oid: wt_oid,
1650 score: None,
1651 });
1652 }
1653 (None, None) => {
1654 }
1656 }
1657 }
1658
1659 result.sort_by(|a, b| a.path().cmp(b.path()));
1660 Ok(result)
1661}
1662
1663fn read_added_entry_bytes(
1666 odb: &Odb,
1667 entry: &DiffEntry,
1668 work_root: Option<&Path>,
1669) -> Option<Vec<u8>> {
1670 if entry.new_oid != zero_oid() {
1671 return odb.read(&entry.new_oid).ok().map(|obj| obj.data);
1672 }
1673 let path = entry.new_path.as_deref()?;
1674 let root = work_root?;
1675 fs::read(root.join(path)).ok()
1676}
1677
1678fn modified_as_copy_from_sources(
1679 odb: &Odb,
1680 work_root: Option<&Path>,
1681 e: &DiffEntry,
1682 threshold: u32,
1683 sources: &[(String, ObjectId, bool)],
1684 source_contents: &[Option<Vec<u8>>],
1685 source_tree_entries: &[(String, String, ObjectId)],
1686) -> Option<DiffEntry> {
1687 fn regular_file_mode(mode: &str) -> bool {
1688 mode == "100644" || mode == "100755"
1689 }
1690
1691 if e.status != DiffStatus::Modified || !regular_file_mode(&e.new_mode) {
1692 return None;
1693 }
1694 let new_data = read_added_entry_bytes(odb, e, work_root)?;
1695 let new_oid_eff = if e.new_oid != zero_oid() {
1696 e.new_oid
1697 } else {
1698 Odb::hash_object_data(ObjectKind::Blob, &new_data)
1699 };
1700
1701 let mut best: Option<(usize, u32)> = None;
1702 for (si, (src_path, src_oid, is_deleted)) in sources.iter().enumerate() {
1703 if *is_deleted {
1704 continue;
1705 }
1706 if e.new_path.as_deref() == Some(src_path.as_str()) {
1707 continue;
1708 }
1709 let src_mode_str = source_tree_entries
1710 .iter()
1711 .find(|(p, _, _)| p == src_path)
1712 .map(|(_, m, _)| m.as_str())
1713 .unwrap_or("100644");
1714 if !regular_file_mode(src_mode_str) {
1715 continue;
1716 }
1717
1718 let score = if *src_oid == new_oid_eff {
1719 100
1720 } else {
1721 match (&source_contents[si], Some(new_data.as_slice())) {
1722 (Some(old_data), Some(nd)) => compute_similarity(old_data, nd),
1723 _ => 0,
1724 }
1725 };
1726 if score >= threshold {
1727 let replace = match best {
1728 None => true,
1729 Some((_, s)) => score > s,
1730 };
1731 if replace {
1732 best = Some((si, score));
1733 }
1734 }
1735 }
1736
1737 let (si, score) = best?;
1738 let (src_path, src_oid, _) = &sources[si];
1739 let src_mode = source_tree_entries
1740 .iter()
1741 .find(|(p, _, _)| p == src_path)
1742 .map(|(_, m, _)| m.clone())
1743 .unwrap_or_else(|| e.old_mode.clone());
1744
1745 Some(DiffEntry {
1746 status: DiffStatus::Copied,
1747 old_path: Some(src_path.clone()),
1748 new_path: e.new_path.clone(),
1749 old_mode: src_mode,
1750 new_mode: e.new_mode.clone(),
1751 old_oid: *src_oid,
1752 new_oid: e.new_oid,
1753 score: Some(score),
1754 })
1755}
1756
1757pub fn detect_renames(
1769 odb: &Odb,
1770 work_root: Option<&Path>,
1771 entries: Vec<DiffEntry>,
1772 threshold: u32,
1773) -> Vec<DiffEntry> {
1774 let mut deleted: Vec<DiffEntry> = Vec::new();
1776 let mut added: Vec<DiffEntry> = Vec::new();
1777 let mut others: Vec<DiffEntry> = Vec::new();
1778
1779 for entry in entries {
1780 match entry.status {
1781 DiffStatus::Deleted => deleted.push(entry),
1782 DiffStatus::Added => added.push(entry),
1783 _ => others.push(entry),
1784 }
1785 }
1786
1787 if deleted.is_empty() || added.is_empty() {
1788 let mut result = others;
1790 result.extend(deleted);
1791 result.extend(added);
1792 result.sort_by(|a, b| a.path().cmp(b.path()));
1793 return result;
1794 }
1795
1796 let deleted_contents: Vec<Option<Vec<u8>>> = deleted
1798 .iter()
1799 .map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
1800 .collect();
1801
1802 let added_contents: Vec<Option<Vec<u8>>> = added
1804 .iter()
1805 .map(|a| read_added_entry_bytes(odb, a, work_root))
1806 .collect();
1807
1808 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1811
1812 fn is_regularish_mode(mode: &str) -> bool {
1813 mode == "100644" || mode == "100755"
1814 }
1815
1816 for (di, del) in deleted.iter().enumerate() {
1817 for (ai, add) in added.iter().enumerate() {
1818 if del.old_oid == add.new_oid {
1820 scores.push((100, di, ai));
1821 continue;
1822 }
1823
1824 if !is_regularish_mode(&del.old_mode) || !is_regularish_mode(&add.new_mode) {
1827 continue;
1828 }
1829
1830 let score = match (&deleted_contents[di], &added_contents[ai]) {
1831 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
1832 _ => 0,
1833 };
1834
1835 if score >= threshold {
1836 scores.push((score, di, ai));
1837 }
1838 }
1839 }
1840
1841 scores.sort_by(|a, b| {
1844 let a_same = same_basename(&deleted[a.1], &added[a.2]);
1845 let b_same = same_basename(&deleted[b.1], &added[b.2]);
1846 b_same.cmp(&a_same).then_with(|| b.0.cmp(&a.0))
1847 });
1848
1849 let mut used_deleted = vec![false; deleted.len()];
1850 let mut used_added = vec![false; added.len()];
1851 let mut renames: Vec<DiffEntry> = Vec::new();
1852
1853 for (score, di, ai) in &scores {
1854 if used_deleted[*di] || used_added[*ai] {
1855 continue;
1856 }
1857 used_deleted[*di] = true;
1858 used_added[*ai] = true;
1859
1860 let del = &deleted[*di];
1861 let add = &added[*ai];
1862
1863 renames.push(DiffEntry {
1864 status: DiffStatus::Renamed,
1865 old_path: del.old_path.clone(),
1866 new_path: add.new_path.clone(),
1867 old_mode: del.old_mode.clone(),
1868 new_mode: add.new_mode.clone(),
1869 old_oid: del.old_oid,
1870 new_oid: add.new_oid,
1871 score: Some(*score),
1872 });
1873 }
1874
1875 let mut result = others;
1877 result.extend(renames);
1878 for (i, entry) in deleted.into_iter().enumerate() {
1879 if !used_deleted[i] {
1880 result.push(entry);
1881 }
1882 }
1883 for (i, entry) in added.into_iter().enumerate() {
1884 if !used_added[i] {
1885 result.push(entry);
1886 }
1887 }
1888
1889 result.sort_by(|a, b| a.path().cmp(b.path()));
1890 result
1891}
1892
1893pub fn detect_copies(
1904 odb: &Odb,
1905 work_root: Option<&Path>,
1906 entries: Vec<DiffEntry>,
1907 threshold: u32,
1908 find_copies_harder: bool,
1909 source_tree_entries: &[(String, String, ObjectId)],
1910) -> Vec<DiffEntry> {
1911 use std::collections::{HashMap, HashSet};
1912
1913 let mut deleted: Vec<DiffEntry> = Vec::new();
1915 let mut added: Vec<DiffEntry> = Vec::new();
1916 let mut others: Vec<DiffEntry> = Vec::new();
1917
1918 for entry in entries {
1919 match entry.status {
1920 DiffStatus::Deleted => deleted.push(entry),
1921 DiffStatus::Added => added.push(entry),
1922 _ => others.push(entry),
1923 }
1924 }
1925
1926 let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
1930
1931 for entry in &deleted {
1932 if let Some(ref path) = entry.old_path {
1933 deleted_source_idx.insert(path.clone(), sources.len());
1934 sources.push((path.clone(), entry.old_oid, true));
1935 }
1936 }
1937
1938 for entry in &others {
1941 if matches!(entry.status, DiffStatus::Modified | DiffStatus::TypeChanged) {
1942 if let Some(ref old_path) = entry.old_path {
1943 if !sources.iter().any(|(p, _, _)| p == old_path) {
1944 sources.push((old_path.clone(), entry.old_oid, false));
1945 }
1946 }
1947 }
1948 }
1949
1950 if find_copies_harder {
1952 for (path, _mode, oid) in source_tree_entries {
1953 if !sources.iter().any(|(p, _, _)| p == path) {
1954 sources.push((path.clone(), *oid, false));
1955 }
1956 }
1957 }
1958
1959 if sources.is_empty() {
1960 let mut result = others;
1961 result.extend(deleted);
1962 result.extend(added);
1963 result.sort_by(|a, b| a.path().cmp(b.path()));
1964 return result;
1965 }
1966
1967 let source_contents: Vec<Option<Vec<u8>>> = sources
1969 .iter()
1970 .map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
1971 .collect();
1972
1973 let mut result_entries: Vec<DiffEntry> = Vec::new();
1974 let mut renamed_deleted: HashSet<usize> = HashSet::new();
1975 let mut used_added2 = vec![false; added.len()];
1976
1977 if !added.is_empty() {
1978 let added_contents: Vec<Option<Vec<u8>>> = added
1980 .iter()
1981 .map(|a| read_added_entry_bytes(odb, a, work_root))
1982 .collect();
1983
1984 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1986 for (si, (src_path, src_oid, _)) in sources.iter().enumerate() {
1987 for (ai, add) in added.iter().enumerate() {
1988 if add.new_path.as_deref() == Some(src_path.as_str()) {
1991 continue;
1992 }
1993 let add_oid = if add.new_oid != zero_oid() {
1994 add.new_oid
1995 } else if let Some(ref data) = added_contents[ai] {
1996 Odb::hash_object_data(ObjectKind::Blob, data)
1997 } else {
1998 zero_oid()
1999 };
2000 if *src_oid == add_oid {
2001 scores.push((100, si, ai));
2002 continue;
2003 }
2004 let score = match (&source_contents[si], &added_contents[ai]) {
2005 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
2006 _ => 0,
2007 };
2008 if score >= threshold {
2009 scores.push((score, si, ai));
2010 }
2011 }
2012 }
2013
2014 scores.sort_by(|a, b| b.0.cmp(&a.0));
2016
2017 let mut used_added = vec![false; added.len()];
2019 let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
2020 for &(score, si, ai) in &scores {
2021 if used_added[ai] {
2022 continue;
2023 }
2024 used_added[ai] = true;
2025 source_to_added.entry(si).or_default().push((ai, score));
2026 }
2027
2028 for (&si, assignments_for_src) in &source_to_added {
2030 let (_, _, is_deleted) = &sources[si];
2031 if *is_deleted && !assignments_for_src.is_empty() {
2032 let rename_ai = assignments_for_src
2035 .iter()
2036 .max_by_key(|(ai, _score)| added[*ai].path().to_string())
2037 .map(|(ai, _)| *ai);
2038
2039 for &(ai, score) in assignments_for_src {
2040 let (ref src_path, _, _) = sources[si];
2041 let add = &added[ai];
2042 let src_mode = source_tree_entries
2043 .iter()
2044 .find(|(p, _, _)| p == src_path)
2045 .map(|(_, m, _)| m.clone())
2046 .unwrap_or_else(|| add.old_mode.clone());
2047
2048 let is_rename = Some(ai) == rename_ai;
2049 result_entries.push(DiffEntry {
2050 status: if is_rename {
2051 DiffStatus::Renamed
2052 } else {
2053 DiffStatus::Copied
2054 },
2055 old_path: Some(src_path.clone()),
2056 new_path: add.new_path.clone(),
2057 old_mode: src_mode,
2058 new_mode: add.new_mode.clone(),
2059 old_oid: sources[si].1,
2060 new_oid: add.new_oid,
2061 score: Some(score),
2062 });
2063 used_added2[ai] = true;
2064 }
2065 renamed_deleted.insert(si);
2066 } else {
2067 for &(ai, score) in assignments_for_src {
2069 let (ref src_path, _, _) = sources[si];
2070 let add = &added[ai];
2071 let src_mode = source_tree_entries
2072 .iter()
2073 .find(|(p, _, _)| p == src_path)
2074 .map(|(_, m, _)| m.clone())
2075 .unwrap_or_else(|| add.old_mode.clone());
2076
2077 result_entries.push(DiffEntry {
2078 status: DiffStatus::Copied,
2079 old_path: Some(src_path.clone()),
2080 new_path: add.new_path.clone(),
2081 old_mode: src_mode,
2082 new_mode: add.new_mode.clone(),
2083 old_oid: sources[si].1,
2084 new_oid: add.new_oid,
2085 score: Some(score),
2086 });
2087 used_added2[ai] = true;
2088 }
2089 }
2090 }
2091 }
2092
2093 for entry in deleted.into_iter() {
2095 if let Some(ref path) = entry.old_path {
2096 if let Some(&si) = deleted_source_idx.get(path) {
2097 if renamed_deleted.contains(&si) {
2098 continue;
2100 }
2101 }
2102 }
2103 result_entries.push(entry);
2104 }
2105
2106 let mut result = others;
2107 result.extend(result_entries);
2108 for (i, entry) in added.into_iter().enumerate() {
2110 if !used_added2[i] {
2111 result.push(entry);
2112 }
2113 }
2114
2115 let mut final_result = Vec::with_capacity(result.len());
2116 for e in result {
2117 if let Some(c) = modified_as_copy_from_sources(
2118 odb,
2119 work_root,
2120 &e,
2121 threshold,
2122 &sources,
2123 &source_contents,
2124 source_tree_entries,
2125 ) {
2126 final_result.push(c);
2127 } else {
2128 final_result.push(e);
2129 }
2130 }
2131
2132 final_result.sort_by(|a, b| a.path().cmp(b.path()));
2133 final_result
2134}
2135
2136pub fn status_apply_rename_copy_detection(
2146 odb: &Odb,
2147 unstaged_raw: Vec<DiffEntry>,
2148 threshold: u32,
2149 copies: bool,
2150 head_tree: Option<&ObjectId>,
2151) -> Result<Vec<DiffEntry>> {
2152 let after_renames = detect_renames(odb, None, unstaged_raw, threshold);
2153 if !copies {
2154 return Ok(after_renames);
2155 }
2156 let source_tree_entries: Vec<(String, String, ObjectId)> = match head_tree {
2157 Some(oid) => flatten_tree(odb, oid, "")?
2158 .into_iter()
2159 .map(|e| (e.path, format_mode(e.mode), e.oid))
2160 .collect(),
2161 None => Vec::new(),
2162 };
2163 Ok(detect_copies(
2164 odb,
2165 None,
2166 after_renames,
2167 threshold,
2168 false,
2169 &source_tree_entries,
2170 ))
2171}
2172
2173pub fn format_rename_path(old: &str, new: &str) -> String {
2181 let ob = old.as_bytes();
2182 let nb = new.as_bytes();
2183
2184 let pfx = {
2186 let mut last_sep = 0usize;
2187 let min_len = ob.len().min(nb.len());
2188 for i in 0..min_len {
2189 if ob[i] != nb[i] {
2190 break;
2191 }
2192 if ob[i] == b'/' {
2193 last_sep = i + 1;
2194 }
2195 }
2196 last_sep
2197 };
2198
2199 let mut sfx = {
2201 let mut last_sep = 0usize;
2202 let min_len = ob.len().min(nb.len());
2203 for i in 0..min_len {
2204 let oi = ob.len() - 1 - i;
2205 let ni = nb.len() - 1 - i;
2206 if ob[oi] != nb[ni] {
2207 break;
2208 }
2209 if ob[oi] == b'/' {
2210 last_sep = i + 1;
2211 }
2212 }
2213 last_sep
2214 };
2215
2216 let mut sfx_at_old = ob.len() - sfx;
2218 let mut sfx_at_new = nb.len() - sfx;
2219
2220 while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
2223 let suffix_bytes = &ob[sfx_at_old..];
2225 let mut new_sfx = 0;
2226 for (i, &b) in suffix_bytes.iter().enumerate().skip(1) {
2228 if b == b'/' {
2229 new_sfx = sfx - i;
2230 break;
2231 }
2232 }
2233 if new_sfx == 0 || new_sfx >= sfx {
2234 sfx_at_old = ob.len();
2235 sfx_at_new = nb.len();
2236 break;
2237 }
2238 sfx = new_sfx;
2239 sfx_at_old = ob.len() - sfx;
2240 sfx_at_new = nb.len() - sfx;
2241 }
2242
2243 let prefix = &old[..pfx];
2250 let suffix = &old[sfx_at_old..];
2251 let old_mid = if pfx <= sfx_at_old {
2252 &old[pfx..sfx_at_old]
2253 } else {
2254 ""
2255 };
2256 let new_mid = if pfx <= sfx_at_new {
2257 &new[pfx..sfx_at_new]
2258 } else {
2259 ""
2260 };
2261
2262 if prefix.is_empty() && suffix.is_empty() {
2263 return format!("{old} => {new}");
2264 }
2265
2266 format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
2267}
2268
2269fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
2271 let old = del.old_path.as_deref().unwrap_or("");
2272 let new = add.new_path.as_deref().unwrap_or("");
2273 let old_base = old.rsplit('/').next().unwrap_or(old);
2274 let new_base = new.rsplit('/').next().unwrap_or(new);
2275 old_base == new_base && !old_base.is_empty()
2276}
2277
2278fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
2283 let old_norm = crate::crlf::crlf_to_lf(old);
2286 let new_norm = crate::crlf::crlf_to_lf(new);
2287
2288 let src_size = old_norm.len();
2289 let dst_size = new_norm.len();
2290
2291 if src_size == 0 && dst_size == 0 {
2292 return 100;
2293 }
2294 let total = src_size + dst_size;
2295 if total == 0 {
2296 return 100;
2297 }
2298
2299 use similar::{ChangeTag, TextDiff};
2301 let old_str = String::from_utf8_lossy(&old_norm);
2302 let new_str = String::from_utf8_lossy(&new_norm);
2303 let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
2304
2305 let mut shared_bytes = 0usize;
2306 for change in diff.iter_all_changes() {
2307 if change.tag() == ChangeTag::Equal {
2308 shared_bytes += change.value().len();
2310 }
2311 }
2312
2313 let max_size = src_size.max(dst_size);
2316
2317 ((shared_bytes * 100) / max_size).min(100) as u32
2318}
2319
2320#[must_use]
2324pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
2325 compute_similarity(old, new)
2326}
2327
2328pub fn format_raw(entry: &DiffEntry) -> String {
2334 let path = match entry.status {
2335 DiffStatus::Renamed | DiffStatus::Copied => {
2336 format!(
2337 "{}\t{}",
2338 entry.old_path.as_deref().unwrap_or(""),
2339 entry.new_path.as_deref().unwrap_or("")
2340 )
2341 }
2342 _ => entry.path().to_owned(),
2343 };
2344
2345 let status_str = match (entry.status, entry.score) {
2346 (DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
2347 (DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
2348 _ => entry.status.letter().to_string(),
2349 };
2350
2351 format!(
2352 ":{} {} {} {} {}\t{}",
2353 entry.old_mode, entry.new_mode, entry.old_oid, entry.new_oid, status_str, path
2354 )
2355}
2356
2357pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
2359 let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
2360 "..."
2361 } else {
2362 ""
2363 };
2364 let old_hex = format!("{}", entry.old_oid);
2365 let new_hex = format!("{}", entry.new_oid);
2366 let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
2367 let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
2368
2369 let path = entry.path();
2370
2371 format!(
2372 ":{} {} {}{} {}{} {}\t{}",
2373 entry.old_mode,
2374 entry.new_mode,
2375 old_abbrev,
2376 ellipsis,
2377 new_abbrev,
2378 ellipsis,
2379 entry.status.letter(),
2380 path
2381 )
2382}
2383
2384pub fn unified_diff(
2399 old_content: &str,
2400 new_content: &str,
2401 old_path: &str,
2402 new_path: &str,
2403 context_lines: usize,
2404 indent_heuristic: bool,
2405 quote_path_fully: bool,
2406) -> String {
2407 unified_diff_with_prefix(
2408 old_content,
2409 new_content,
2410 old_path,
2411 new_path,
2412 context_lines,
2413 0,
2414 "a/",
2415 "b/",
2416 indent_heuristic,
2417 quote_path_fully,
2418 )
2419}
2420
2421#[allow(clippy::too_many_arguments)] pub fn unified_diff_with_prefix(
2427 old_content: &str,
2428 new_content: &str,
2429 old_path: &str,
2430 new_path: &str,
2431 context_lines: usize,
2432 inter_hunk_context: usize,
2433 src_prefix: &str,
2434 dst_prefix: &str,
2435 indent_heuristic: bool,
2436 quote_path_fully: bool,
2437) -> String {
2438 unified_diff_with_prefix_and_funcname(
2439 old_content,
2440 new_content,
2441 old_path,
2442 new_path,
2443 context_lines,
2444 inter_hunk_context,
2445 src_prefix,
2446 dst_prefix,
2447 None,
2448 indent_heuristic,
2449 quote_path_fully,
2450 )
2451}
2452
2453#[allow(clippy::too_many_arguments)]
2456pub fn unified_diff_with_prefix_and_funcname(
2457 old_content: &str,
2458 new_content: &str,
2459 old_path: &str,
2460 new_path: &str,
2461 context_lines: usize,
2462 inter_hunk_context: usize,
2463 src_prefix: &str,
2464 dst_prefix: &str,
2465 funcname_matcher: Option<&FuncnameMatcher>,
2466 indent_heuristic: bool,
2467 quote_path_fully: bool,
2468) -> String {
2469 unified_diff_with_prefix_and_funcname_and_algorithm(
2470 old_content,
2471 new_content,
2472 old_path,
2473 new_path,
2474 context_lines,
2475 inter_hunk_context,
2476 src_prefix,
2477 dst_prefix,
2478 funcname_matcher,
2479 similar::Algorithm::Myers,
2480 false,
2481 false,
2482 indent_heuristic,
2483 quote_path_fully,
2484 )
2485}
2486
2487#[allow(clippy::too_many_arguments)]
2493pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
2494 old_content: &str,
2495 new_content: &str,
2496 old_path: &str,
2497 new_path: &str,
2498 context_lines: usize,
2499 inter_hunk_context: usize,
2500 src_prefix: &str,
2501 dst_prefix: &str,
2502 funcname_matcher: Option<&FuncnameMatcher>,
2503 algorithm: similar::Algorithm,
2504 function_context: bool,
2505 use_git_histogram: bool,
2506 indent_heuristic: bool,
2507 quote_path_fully: bool,
2508) -> String {
2509 if use_git_histogram {
2510 return unified_diff_histogram_with_prefix_and_funcname(
2511 old_content,
2512 new_content,
2513 old_path,
2514 new_path,
2515 context_lines,
2516 inter_hunk_context,
2517 src_prefix,
2518 dst_prefix,
2519 funcname_matcher,
2520 quote_path_fully,
2521 );
2522 }
2523
2524 if function_context {
2525 return unified_diff_with_function_context(
2526 old_content,
2527 new_content,
2528 old_path,
2529 new_path,
2530 context_lines,
2531 inter_hunk_context,
2532 src_prefix,
2533 dst_prefix,
2534 funcname_matcher,
2535 algorithm,
2536 indent_heuristic,
2537 quote_path_fully,
2538 );
2539 }
2540
2541 use crate::quote_path::format_diff_path_with_prefix;
2542 use similar::{group_diff_ops, udiff::UnifiedDiffHunk, TextDiff};
2543
2544 let diff = TextDiff::configure()
2545 .algorithm(algorithm)
2546 .diff_lines(old_content, new_content);
2547 let compacted_ops = diff_indent_heuristic::diff_lines_ops_compacted(
2548 old_content,
2549 new_content,
2550 algorithm,
2551 indent_heuristic,
2552 );
2553
2554 let mut output = String::new();
2555 if old_path == "/dev/null" {
2556 output.push_str("--- /dev/null\n");
2557 } else if src_prefix.is_empty() {
2558 output.push_str(&format!("--- {old_path}\n"));
2561 } else {
2562 output.push_str("--- ");
2563 output.push_str(&format_diff_path_with_prefix(
2564 src_prefix,
2565 old_path,
2566 quote_path_fully,
2567 ));
2568 output.push('\n');
2569 }
2570 if new_path == "/dev/null" {
2571 output.push_str("+++ /dev/null\n");
2572 } else if dst_prefix.is_empty() {
2573 output.push_str(&format!("+++ {new_path}\n"));
2574 } else {
2575 output.push_str("+++ ");
2576 output.push_str(&format_diff_path_with_prefix(
2577 dst_prefix,
2578 new_path,
2579 quote_path_fully,
2580 ));
2581 output.push('\n');
2582 }
2583
2584 let old_lines: Vec<&str> = old_content.lines().collect();
2585
2586 let max_common_gap = context_lines
2591 .saturating_mul(2)
2592 .saturating_add(inter_hunk_context);
2593 let group_radius = max_common_gap.div_ceil(2);
2594 let op_groups = group_diff_ops(compacted_ops, group_radius);
2595
2596 for ops in op_groups {
2597 if ops.is_empty() {
2598 continue;
2599 }
2600 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
2601 let hunk_str = format!("{hunk}");
2602 if let Some(first_newline) = hunk_str.find('\n') {
2606 let header_line = &hunk_str[..first_newline];
2607 let rest = &hunk_str[first_newline..];
2608
2609 if let Some(func_ctx) =
2611 extract_function_context(header_line, &old_lines, funcname_matcher)
2612 {
2613 output.push_str(header_line);
2614 output.push(' ');
2615 output.push_str(&func_ctx);
2616 output.push_str(rest);
2617 } else {
2618 output.push_str(&hunk_str);
2619 }
2620 } else {
2621 output.push_str(&hunk_str);
2622 }
2623 }
2624
2625 output
2626}
2627
2628fn unified_diff_with_function_context(
2630 old_content: &str,
2631 new_content: &str,
2632 old_path: &str,
2633 new_path: &str,
2634 context_lines: usize,
2635 inter_hunk_context: usize,
2636 src_prefix: &str,
2637 dst_prefix: &str,
2638 funcname_matcher: Option<&FuncnameMatcher>,
2639 algorithm: similar::Algorithm,
2640 indent_heuristic: bool,
2641 quote_path_fully: bool,
2642) -> String {
2643 use crate::quote_path::format_diff_path_with_prefix;
2644 use similar::{group_diff_ops, udiff::UnifiedDiffHunk, TextDiff};
2645
2646 let diff = TextDiff::configure()
2647 .algorithm(algorithm)
2648 .diff_lines(old_content, new_content);
2649
2650 let old_lines: Vec<&str> = old_content.lines().collect();
2651 let new_lines: Vec<&str> = new_content.lines().collect();
2652 let n_old = old_lines.len();
2653 let n_new = new_lines.len();
2654
2655 let group_radius = context_lines
2656 .saturating_mul(2)
2657 .saturating_add(inter_hunk_context);
2658 let all_ops = diff.ops().to_vec();
2659 let op_groups = group_diff_ops(all_ops.clone(), group_radius);
2660
2661 let mut ranges: Vec<(usize, usize, usize, usize)> = Vec::new();
2662
2663 for ops in op_groups {
2664 if ops.is_empty() {
2665 continue;
2666 }
2667 let i1_anchor = func_context_old_anchor(&ops, n_old);
2668 let i1_end = hunk_old_change_end_exclusive(&ops);
2669 let skip_preimage_pull =
2670 append_with_whole_function_added(&ops, n_old, n_new, &new_lines, funcname_matcher);
2671 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
2672 let hunk_str = format!("{hunk}");
2673 let header_line = hunk_str
2674 .lines()
2675 .next()
2676 .unwrap_or("")
2677 .trim_end_matches(['\r', '\n']);
2678 let Some((base_s1, base_e1, _base_s2, _base_e2)) =
2679 parse_unified_hunk_header_ranges(header_line)
2680 else {
2681 continue;
2682 };
2683
2684 let ctx = context_lines;
2685 let (s1, e1, s2, e2) = if skip_preimage_pull {
2686 let s = n_old.saturating_sub(ctx);
2687 let s2 = map_old_line_to_new(&all_ops, s, n_new).min(n_new);
2688 (s, n_old, s2, n_new)
2689 } else {
2690 let mut s1 = base_s1.saturating_sub(ctx);
2691 let mut s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
2692
2693 let base_pre_s1 = i1_anchor.saturating_sub(ctx);
2694 if base_pre_s1 < s1 {
2695 s1 = base_pre_s1;
2696 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
2697 }
2698
2699 let fs1 = expand_func_pre_start(s1, i1_anchor, n_old, &old_lines, funcname_matcher);
2700 if fs1 < s1 {
2701 s1 = fs1;
2702 s2 = map_old_line_to_new(&all_ops, s1, n_new).min(n_new);
2703 }
2704
2705 let mut e1 = (base_e1 + ctx).min(n_old);
2706 let mut e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
2707 let fe1 = expand_func_post_end(e1, i1_end, n_old, &old_lines, funcname_matcher);
2708 if fe1 > e1 {
2709 e1 = fe1;
2710 e2 = map_old_line_to_new(&all_ops, e1, n_new).min(n_new);
2711 }
2712 (s1, e1, s2, e2)
2713 };
2714
2715 ranges.push((s1, e1, s2, e2));
2716 }
2717
2718 let mut output = String::new();
2719 if old_path == "/dev/null" {
2720 output.push_str("--- /dev/null\n");
2721 } else if src_prefix.is_empty() {
2722 output.push_str(&format!("--- {old_path}\n"));
2723 } else {
2724 output.push_str("--- ");
2725 output.push_str(&format_diff_path_with_prefix(
2726 src_prefix,
2727 old_path,
2728 quote_path_fully,
2729 ));
2730 output.push('\n');
2731 }
2732 if new_path == "/dev/null" {
2733 output.push_str("+++ /dev/null\n");
2734 } else if dst_prefix.is_empty() {
2735 output.push_str(&format!("+++ {new_path}\n"));
2736 } else {
2737 output.push_str("+++ ");
2738 output.push_str(&format_diff_path_with_prefix(
2739 dst_prefix,
2740 new_path,
2741 quote_path_fully,
2742 ));
2743 output.push('\n');
2744 }
2745
2746 for (s1, e1, s2, e2) in ranges {
2747 if s1 >= e1 && s2 >= e2 {
2748 continue;
2749 }
2750 let old_seg =
2751 line_slice_for_diff_with_eof_nl(&old_lines, s1, e1, old_content.ends_with('\n'));
2752 let new_seg =
2753 line_slice_for_diff_with_eof_nl(&new_lines, s2, e2, new_content.ends_with('\n'));
2754 let inner_ctx = old_seg.lines().count().max(new_seg.lines().count()).max(1);
2755 let piece = unified_diff_with_prefix_and_funcname_and_algorithm(
2756 &old_seg,
2757 &new_seg,
2758 old_path,
2759 new_path,
2760 inner_ctx,
2761 0,
2762 src_prefix,
2763 dst_prefix,
2764 funcname_matcher,
2765 algorithm,
2766 false,
2767 false,
2768 indent_heuristic,
2769 quote_path_fully,
2770 );
2771 let shifted = shift_unified_hunk_headers_to_full_file(&piece, s1, s2);
2772 let with_func =
2773 enrich_unified_hunk_headers_funcname(&shifted, &old_lines, funcname_matcher);
2774 for line in with_func.lines() {
2775 if line.starts_with("--- ") || line.starts_with("+++ ") {
2776 continue;
2777 }
2778 output.push_str(line);
2779 output.push('\n');
2780 }
2781 }
2782
2783 output
2784}
2785
2786fn shift_unified_hunk_headers_to_full_file(
2791 patch: &str,
2792 delta_old: usize,
2793 delta_new: usize,
2794) -> String {
2795 if delta_old == 0 && delta_new == 0 {
2796 return patch.to_owned();
2797 }
2798 let mut out = String::with_capacity(patch.len());
2799 for line in patch.lines() {
2800 if let Some(shifted) = shift_one_unified_hunk_header(line, delta_old, delta_new) {
2801 out.push_str(&shifted);
2802 } else {
2803 out.push_str(line);
2804 }
2805 out.push('\n');
2806 }
2807 out
2808}
2809
2810fn shift_one_unified_hunk_header(line: &str, delta_old: usize, delta_new: usize) -> Option<String> {
2811 let rest = line.strip_prefix("@@ ")?;
2812 let (old_chunk, after_plus) = rest.split_once(" +")?;
2813 let old_spec = old_chunk.strip_prefix('-')?;
2814 let (new_spec, suffix) = after_plus.split_once(" @@")?;
2815 let shifted_old = shift_unified_range_spec(old_spec, delta_old)?;
2816 let shifted_new = shift_unified_range_spec(new_spec, delta_new)?;
2817 Some(format!("@@ -{shifted_old} +{shifted_new} @@{suffix}"))
2818}
2819
2820fn shift_unified_range_spec(spec: &str, delta: usize) -> Option<String> {
2821 let spec = spec.trim();
2822 if let Some((start_s, count_s)) = spec.split_once(',') {
2823 let start: usize = start_s.parse().ok()?;
2824 let count: usize = count_s.parse().ok()?;
2825 Some(format!("{},{}", start.saturating_add(delta), count))
2826 } else {
2827 let start: usize = spec.parse().ok()?;
2828 Some(format!("{}", start.saturating_add(delta)))
2829 }
2830}
2831
2832fn enrich_unified_hunk_headers_funcname(
2834 patch: &str,
2835 full_old_lines: &[&str],
2836 funcname_matcher: Option<&FuncnameMatcher>,
2837) -> String {
2838 let mut out = String::with_capacity(patch.len());
2839 for line in patch.lines() {
2840 if let Some(fixed) = enrich_one_hunk_header_funcname(line, full_old_lines, funcname_matcher)
2841 {
2842 out.push_str(&fixed);
2843 } else {
2844 out.push_str(line);
2845 }
2846 out.push('\n');
2847 }
2848 out
2849}
2850
2851fn enrich_one_hunk_header_funcname(
2852 line: &str,
2853 full_old_lines: &[&str],
2854 funcname_matcher: Option<&FuncnameMatcher>,
2855) -> Option<String> {
2856 let after_at = line.strip_prefix("@@ ")?;
2857 let idx = after_at.find(" @@")?;
2858 let mid = after_at[..idx].trim();
2859 let tail = after_at[idx + 3..].trim_start();
2860 let header_for_parse = format!("@@ {mid} @@");
2861 let func = extract_function_context(&header_for_parse, full_old_lines, funcname_matcher);
2862 Some(if let Some(f) = func {
2863 format!("@@ {mid} @@ {f}")
2864 } else if !tail.is_empty() {
2865 format!("@@ {mid} @@ {tail}")
2866 } else {
2867 format!("@@ {mid} @@")
2868 })
2869}
2870
2871fn line_slice_for_diff_with_eof_nl(
2872 lines: &[&str],
2873 start: usize,
2874 end: usize,
2875 full_file_ends_with_newline: bool,
2876) -> String {
2877 if start >= end {
2878 return String::new();
2879 }
2880 let mut s = lines[start..end].join("\n");
2881 let slice_is_suffix_of_file = end == lines.len();
2882 let need_trailing_nl = if slice_is_suffix_of_file {
2883 full_file_ends_with_newline
2884 } else {
2885 true
2886 };
2887 if need_trailing_nl && !s.ends_with('\n') {
2888 s.push('\n');
2889 }
2890 s
2891}
2892
2893fn map_old_line_to_new(ops: &[similar::DiffOp], old_line: usize, n_new: usize) -> usize {
2896 use similar::DiffOp;
2897 let mut n = 0usize;
2898 for op in ops {
2899 match *op {
2900 DiffOp::Equal {
2901 old_index,
2902 new_index,
2903 len,
2904 } => {
2905 if old_index + len <= old_line {
2906 n = new_index + len;
2907 continue;
2908 }
2909 if old_index < old_line {
2910 let take = old_line - old_index;
2911 return (new_index + take).min(n_new);
2912 }
2913 return new_index.min(n_new);
2914 }
2915 DiffOp::Delete {
2916 old_index,
2917 old_len,
2918 new_index,
2919 } => {
2920 if old_index + old_len <= old_line {
2921 n = new_index;
2922 continue;
2923 }
2924 if old_index < old_line {
2925 return new_index.min(n_new);
2926 }
2927 }
2928 DiffOp::Insert {
2929 old_index,
2930 new_index,
2931 new_len,
2932 } => {
2933 if old_index < old_line {
2934 n = new_index + new_len;
2935 continue;
2936 }
2937 if old_index == old_line {
2938 return (new_index + new_len).min(n_new);
2941 }
2942 return new_index.min(n_new);
2943 }
2944 DiffOp::Replace {
2945 old_index,
2946 old_len,
2947 new_index,
2948 new_len,
2949 } => {
2950 if old_index + old_len <= old_line {
2951 n = new_index + new_len;
2952 continue;
2953 }
2954 if old_index < old_line {
2955 let into_old = old_line - old_index;
2956 let mapped = new_index + into_old.min(new_len);
2957 return mapped.min(n_new);
2958 }
2959 return new_index.min(n_new);
2960 }
2961 }
2962 }
2963 n.min(n_new)
2964}
2965
2966fn parse_unified_hunk_header_ranges(header: &str) -> Option<(usize, usize, usize, usize)> {
2968 let rest = header.strip_prefix("@@ ")?;
2969 let (old_tok, rest2) = rest.split_once(" +")?;
2970 let old_tok = old_tok.strip_prefix('-')?;
2971 let new_tok = rest2.split_once(" @@").map(|(a, _)| a)?;
2972
2973 fn parse_side(spec: &str) -> Option<(usize, usize)> {
2974 let spec = spec.trim();
2975 let (start_one_based, count) = if let Some((a, b)) = spec.split_once(',') {
2976 (a.parse::<usize>().ok()?, b.parse::<usize>().ok()?)
2977 } else {
2978 let s = spec.parse::<usize>().ok()?;
2979 (s, 1usize)
2980 };
2981 let s0 = start_one_based.saturating_sub(1);
2982 let e0 = s0.saturating_add(count);
2983 Some((s0, e0))
2984 }
2985
2986 let (os, oe) = parse_side(old_tok)?;
2987 let (ns, ne) = parse_side(new_tok)?;
2988 Some((os, oe, ns, ne))
2989}
2990
2991fn append_with_whole_function_added(
2994 ops: &[similar::DiffOp],
2995 n_old: usize,
2996 n_new: usize,
2997 new_lines: &[&str],
2998 matcher: Option<&FuncnameMatcher>,
2999) -> bool {
3000 use similar::DiffOp;
3001 if n_old == 0 {
3002 return false;
3003 }
3004 let mut only_ins_or_eq = true;
3005 let mut min_new_ins = usize::MAX;
3006 for op in ops {
3007 match *op {
3008 DiffOp::Equal { .. } => {}
3009 DiffOp::Insert {
3010 new_index, new_len, ..
3011 } => {
3012 min_new_ins = min_new_ins.min(new_index);
3013 if new_len == 0 {
3014 only_ins_or_eq = false;
3015 }
3016 }
3017 DiffOp::Delete { .. } | DiffOp::Replace { .. } => {
3018 only_ins_or_eq = false;
3019 }
3020 }
3021 }
3022 let mut insert_at_eof = false;
3023 for op in ops {
3024 if let DiffOp::Insert { old_index, .. } = *op {
3025 if old_index == n_old {
3026 insert_at_eof = true;
3027 break;
3028 }
3029 }
3030 }
3031 let append_at_eof = min_new_ins == n_old || insert_at_eof;
3032 if !only_ins_or_eq || !append_at_eof || min_new_ins == usize::MAX {
3033 return false;
3034 }
3035 let mut j = min_new_ins;
3040 while j < n_new {
3041 let line = new_lines[j];
3042 if line.trim().is_empty() {
3043 j += 1;
3044 continue;
3045 }
3046 if let Some(m) = matcher {
3047 if m.match_line(line).is_some() {
3048 return true;
3049 }
3050 } else if inserted_block_starts_with_c_like_function_definition(line) {
3051 return true;
3052 }
3053 j += 1;
3054 }
3055 false
3056}
3057
3058fn inserted_block_starts_with_c_like_function_definition(line: &str) -> bool {
3059 let t = line.trim_start();
3060 let Some(open_paren) = t.find('(') else {
3061 return false;
3062 };
3063 let head = &t[..open_paren];
3064 let tokens: Vec<&str> = head.split_whitespace().collect();
3065 if tokens.len() < 2 {
3066 return false;
3068 }
3069 let nameish = tokens.last().copied().unwrap_or("");
3070 let name = nameish.trim_end_matches(['*', '&']);
3071 if name.is_empty() || !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
3072 return false;
3073 }
3074 let type_or_modifier = |tok: &str| {
3075 matches!(
3076 tok,
3077 "static"
3078 | "extern"
3079 | "inline"
3080 | "void"
3081 | "int"
3082 | "char"
3083 | "short"
3084 | "long"
3085 | "float"
3086 | "double"
3087 | "unsigned"
3088 | "signed"
3089 | "struct"
3090 | "enum"
3091 | "union"
3092 | "const"
3093 | "volatile"
3094 | "typedef"
3095 )
3096 };
3097 tokens[..tokens.len() - 1]
3098 .iter()
3099 .any(|tok| type_or_modifier(tok))
3100}
3101
3102fn hunk_old_change_end_exclusive(ops: &[similar::DiffOp]) -> usize {
3103 use similar::DiffOp;
3104 let mut max_o = 0usize;
3105 for op in ops {
3106 match *op {
3107 DiffOp::Delete {
3108 old_index, old_len, ..
3109 } => {
3110 max_o = max_o.max(old_index + old_len);
3111 }
3112 DiffOp::Replace {
3113 old_index, old_len, ..
3114 } => {
3115 max_o = max_o.max(old_index + old_len);
3116 }
3117 DiffOp::Insert { old_index, .. } => {
3118 max_o = max_o.max(old_index);
3121 }
3122 DiffOp::Equal { .. } => {}
3123 }
3124 }
3125 max_o
3126}
3127
3128fn func_context_old_anchor(ops: &[similar::DiffOp], n_old: usize) -> usize {
3129 use similar::DiffOp;
3130 let mut has_delete_or_replace = false;
3131 let mut min_del = usize::MAX;
3132 let mut min_ins_old = usize::MAX;
3133
3134 for op in ops {
3135 match *op {
3136 DiffOp::Delete {
3137 old_index, old_len, ..
3138 } => {
3139 has_delete_or_replace = true;
3140 min_del = min_del.min(old_index);
3141 min_del = min_del.min(old_index + old_len.saturating_sub(1));
3142 }
3143 DiffOp::Replace {
3144 old_index, old_len, ..
3145 } => {
3146 has_delete_or_replace = true;
3147 min_del = min_del.min(old_index);
3148 min_del = min_del.min(old_index + old_len.saturating_sub(1));
3149 }
3150 DiffOp::Insert { old_index, .. } => {
3151 min_ins_old = min_ins_old.min(old_index);
3152 }
3153 DiffOp::Equal { .. } => {}
3154 }
3155 }
3156
3157 let mut i1 = if has_delete_or_replace {
3158 min_del
3159 } else if min_ins_old != usize::MAX {
3160 min_ins_old
3161 } else {
3162 0
3163 };
3164
3165 let pure_insert = ops
3166 .iter()
3167 .all(|op| matches!(op, DiffOp::Insert { .. } | DiffOp::Equal { .. }))
3168 && ops.iter().any(|op| matches!(op, DiffOp::Insert { .. }));
3169
3170 if pure_insert && i1 >= n_old && n_old > 0 {
3171 i1 = n_old - 1;
3172 }
3173
3174 i1.min(n_old.saturating_sub(1))
3175}
3176
3177fn expand_func_pre_start(
3178 s1: usize,
3179 i1: usize,
3180 n_old: usize,
3181 old_lines: &[&str],
3182 matcher: Option<&FuncnameMatcher>,
3183) -> usize {
3184 if n_old == 0 {
3185 return s1;
3186 }
3187 let i1 = i1.min(n_old.saturating_sub(1));
3188 let mut fs1 = get_func_line_backward(old_lines, i1, matcher).unwrap_or(i1);
3189 while fs1 > 0
3190 && !is_line_empty_for_func_context(old_lines[fs1 - 1])
3191 && !is_func_line(old_lines[fs1 - 1], matcher)
3192 {
3193 fs1 -= 1;
3194 }
3195 s1.min(fs1)
3196}
3197
3198fn expand_func_post_end(
3199 e1: usize,
3200 i1_end: usize,
3201 n_old: usize,
3202 old_lines: &[&str],
3203 matcher: Option<&FuncnameMatcher>,
3204) -> usize {
3205 let from = i1_end.min(n_old);
3206 let fe1 = get_func_line_forward(old_lines, from, matcher).unwrap_or(n_old);
3207 let mut fe1_adj = fe1;
3208 while fe1_adj > 0 && is_line_empty_for_func_context(old_lines[fe1_adj - 1]) {
3209 fe1_adj -= 1;
3210 }
3211 e1.max(fe1_adj).min(n_old)
3212}
3213
3214fn is_line_empty_for_func_context(line: &str) -> bool {
3215 line.chars().all(|c| c.is_whitespace())
3216}
3217
3218fn is_func_line(line: &str, matcher: Option<&FuncnameMatcher>) -> bool {
3219 if let Some(m) = matcher {
3220 return m.match_line(line).is_some();
3221 }
3222 let t = line.trim_end_matches(['\n', '\r']);
3223 if t.is_empty() {
3224 return false;
3225 }
3226 let b = t.as_bytes()[0];
3227 b.is_ascii_alphabetic() || b == b'_' || b == b'$'
3228}
3229
3230fn get_func_line_backward(
3231 old_lines: &[&str],
3232 start: usize,
3233 matcher: Option<&FuncnameMatcher>,
3234) -> Option<usize> {
3235 let mut l = start.min(old_lines.len().saturating_sub(1));
3236 if old_lines.is_empty() {
3237 return None;
3238 }
3239 loop {
3240 if is_func_line(old_lines[l], matcher) {
3241 return Some(l);
3242 }
3243 if l == 0 {
3244 break;
3245 }
3246 l -= 1;
3247 }
3248 None
3249}
3250
3251fn get_func_line_forward(
3252 old_lines: &[&str],
3253 start: usize,
3254 matcher: Option<&FuncnameMatcher>,
3255) -> Option<usize> {
3256 let mut l = start;
3257 while l < old_lines.len() {
3258 if is_func_line(old_lines[l], matcher) {
3259 return Some(l);
3260 }
3261 l += 1;
3262 }
3263 None
3264}
3265
3266pub fn anchored_unified_diff(
3276 old_content: &str,
3277 new_content: &str,
3278 old_path: &str,
3279 new_path: &str,
3280 context_lines: usize,
3281 anchors: &[String],
3282 algorithm: similar::Algorithm,
3283 use_git_histogram: bool,
3284 indent_heuristic: bool,
3285 quote_path_fully: bool,
3286) -> String {
3287 use crate::quote_path::format_diff_path_with_prefix;
3288 use similar::TextDiff;
3289
3290 let old_lines: Vec<&str> = old_content.lines().collect();
3291 let new_lines: Vec<&str> = new_content.lines().collect();
3292
3293 let mut anchor_pairs: Vec<(usize, usize)> = Vec::new(); for anchor in anchors {
3297 let anchor_str = anchor.as_str();
3298
3299 let old_positions: Vec<usize> = old_lines
3301 .iter()
3302 .enumerate()
3303 .filter(|(_, l)| l.trim_end() == anchor_str)
3304 .map(|(i, _)| i)
3305 .collect();
3306
3307 let new_positions: Vec<usize> = new_lines
3309 .iter()
3310 .enumerate()
3311 .filter(|(_, l)| l.trim_end() == anchor_str)
3312 .map(|(i, _)| i)
3313 .collect();
3314
3315 if old_positions.len() == 1 && new_positions.len() == 1 {
3317 anchor_pairs.push((old_positions[0], new_positions[0]));
3318 }
3319 }
3320
3321 if anchor_pairs.is_empty() {
3323 return unified_diff_with_prefix_and_funcname_and_algorithm(
3324 old_content,
3325 new_content,
3326 old_path,
3327 new_path,
3328 context_lines,
3329 0,
3330 "a/",
3331 "b/",
3332 None,
3333 algorithm,
3334 false,
3335 use_git_histogram,
3336 indent_heuristic,
3337 quote_path_fully,
3338 );
3339 }
3340
3341 anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
3343
3344 let mut filtered: Vec<(usize, usize)> = Vec::new();
3347 for &pair in &anchor_pairs {
3348 if filtered.is_empty() || filtered.last().is_some_and(|last| pair.1 > last.1) {
3349 filtered.push(pair);
3350 }
3351 }
3352 let anchor_pairs = filtered;
3353
3354 struct LineDiffOp {
3363 tag: char, line: String,
3365 }
3366
3367 let append_segment_diff =
3368 |ops: &mut Vec<LineDiffOp>, old_seg_input: &str, new_seg_input: &str| {
3369 use similar::ChangeTag;
3370 let old_ls: Vec<&str> = old_seg_input.lines().collect();
3371 let new_ls: Vec<&str> = new_seg_input.lines().collect();
3372 if old_ls.is_empty() && new_ls.is_empty() {
3373 return;
3374 }
3375 let seg_diff = TextDiff::configure()
3376 .algorithm(algorithm)
3377 .diff_slices(&old_ls, &new_ls);
3378 let raw = seg_diff.ops().to_vec();
3379 let compacted = diff_indent_heuristic::apply_change_compact_to_ops(
3380 &raw,
3381 &old_ls,
3382 &new_ls,
3383 indent_heuristic,
3384 );
3385 for op in &compacted {
3386 for ch in op.iter_changes(&old_ls, &new_ls) {
3387 let t = match ch.tag() {
3388 ChangeTag::Equal => ' ',
3389 ChangeTag::Delete => '-',
3390 ChangeTag::Insert => '+',
3391 };
3392 ops.push(LineDiffOp {
3393 tag: t,
3394 line: ch.value().to_string(),
3395 });
3396 }
3397 }
3398 };
3399
3400 let mut ops: Vec<LineDiffOp> = Vec::new();
3401 let mut old_pos = 0usize;
3402 let mut new_pos = 0usize;
3403
3404 for &(old_anchor, new_anchor) in &anchor_pairs {
3405 let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
3407 let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
3408
3409 let old_seg_text = old_segment.join("\n");
3410 let new_seg_text = new_segment.join("\n");
3411
3412 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
3413 let old_seg_input = if old_seg_text.is_empty() {
3414 String::new()
3415 } else {
3416 format!("{}\n", old_seg_text)
3417 };
3418 let new_seg_input = if new_seg_text.is_empty() {
3419 String::new()
3420 } else {
3421 format!("{}\n", new_seg_text)
3422 };
3423 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
3424 }
3425
3426 ops.push(LineDiffOp {
3428 tag: ' ',
3429 line: old_lines[old_anchor].to_string(),
3430 });
3431
3432 old_pos = old_anchor + 1;
3433 new_pos = new_anchor + 1;
3434 }
3435
3436 let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
3438 let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
3439 let old_seg_text = old_segment.join("\n");
3440 let new_seg_text = new_segment.join("\n");
3441
3442 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
3443 let old_seg_input = if old_seg_text.is_empty() {
3444 String::new()
3445 } else {
3446 format!("{}\n", old_seg_text)
3447 };
3448 let new_seg_input = if new_seg_text.is_empty() {
3449 String::new()
3450 } else {
3451 format!("{}\n", new_seg_text)
3452 };
3453 append_segment_diff(&mut ops, &old_seg_input, &new_seg_input);
3454 }
3455
3456 let mut output = String::new();
3458 if old_path == "/dev/null" {
3459 output.push_str("--- /dev/null\n");
3460 } else {
3461 output.push_str("--- ");
3462 output.push_str(&format_diff_path_with_prefix(
3463 "a/",
3464 old_path,
3465 quote_path_fully,
3466 ));
3467 output.push('\n');
3468 }
3469 if new_path == "/dev/null" {
3470 output.push_str("+++ /dev/null\n");
3471 } else {
3472 output.push_str("+++ ");
3473 output.push_str(&format_diff_path_with_prefix(
3474 "b/",
3475 new_path,
3476 quote_path_fully,
3477 ));
3478 output.push('\n');
3479 }
3480
3481 let total_ops = ops.len();
3483 if total_ops == 0 {
3484 return output;
3485 }
3486
3487 let mut hunks: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
3490 while i < total_ops {
3491 if ops[i].tag != ' ' {
3492 let start = i.saturating_sub(context_lines);
3493 let mut end = i;
3494 while end < total_ops {
3496 if ops[end].tag != ' ' {
3497 end += 1;
3498 continue;
3499 }
3500 let mut next_change = end;
3502 while next_change < total_ops && ops[next_change].tag == ' ' {
3503 next_change += 1;
3504 }
3505 if next_change < total_ops && next_change - end <= context_lines * 2 {
3506 end = next_change + 1;
3507 } else {
3508 end = (end + context_lines).min(total_ops);
3509 break;
3510 }
3511 }
3512 if let Some(last) = hunks.last_mut() {
3514 if start <= last.1 {
3515 last.1 = end;
3516 } else {
3517 hunks.push((start, end));
3518 }
3519 } else {
3520 hunks.push((start, end));
3521 }
3522 i = end;
3523 } else {
3524 i += 1;
3525 }
3526 }
3527
3528 for (start, end) in hunks {
3530 let mut old_start = 1usize;
3532 let mut new_start = 1usize;
3533 for op in &ops[..start] {
3535 match op.tag {
3536 ' ' => {
3537 old_start += 1;
3538 new_start += 1;
3539 }
3540 '-' => {
3541 old_start += 1;
3542 }
3543 '+' => {
3544 new_start += 1;
3545 }
3546 _ => {}
3547 }
3548 }
3549 let mut old_count = 0usize;
3550 let mut new_count = 0usize;
3551 for op in &ops[start..end] {
3552 match op.tag {
3553 ' ' => {
3554 old_count += 1;
3555 new_count += 1;
3556 }
3557 '-' => {
3558 old_count += 1;
3559 }
3560 '+' => {
3561 new_count += 1;
3562 }
3563 _ => {}
3564 }
3565 }
3566
3567 output.push_str(&format!(
3568 "@@ -{},{} +{},{} @@\n",
3569 old_start, old_count, new_start, new_count
3570 ));
3571 for op in &ops[start..end] {
3572 output.push(op.tag);
3573 output.push_str(&op.line);
3574 output.push('\n');
3575 }
3576 }
3577
3578 output
3579}
3580
3581fn extract_function_context(
3587 header: &str,
3588 old_lines: &[&str],
3589 funcname_matcher: Option<&FuncnameMatcher>,
3590) -> Option<String> {
3591 let at_pos = header.find("-")?;
3593 let rest = &header[at_pos + 1..];
3594 let comma_or_space = rest.find([',', ' '])?;
3595 let start_str = &rest[..comma_or_space];
3596 let start_line: usize = start_str.parse().ok()?;
3597
3598 if start_line <= 1 {
3599 return None;
3600 }
3601
3602 let search_end = (start_line - 1).min(old_lines.len());
3607 let truncate = |text: &str| {
3608 if text.len() > 80 {
3609 let mut end = 80;
3610 while end > 0 && !text.is_char_boundary(end) {
3611 end -= 1;
3612 }
3613 text[..end].to_owned()
3614 } else {
3615 text.to_owned()
3616 }
3617 };
3618
3619 for i in (0..search_end).rev() {
3620 let line = old_lines[i];
3621 if line.is_empty() {
3622 continue;
3623 }
3624 if let Some(matcher) = funcname_matcher {
3625 if let Some(matched) = matcher.match_line(line) {
3626 return Some(truncate(&matched));
3627 }
3628 continue;
3629 }
3630
3631 let first = line.as_bytes()[0];
3632 if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
3633 return Some(truncate(line.trim_end_matches(char::is_whitespace)));
3634 }
3635 }
3636 None
3637}
3638
3639pub fn format_stat_line(
3643 path: &str,
3644 insertions: usize,
3645 deletions: usize,
3646 max_path_len: usize,
3647) -> String {
3648 format_stat_line_width(path, insertions, deletions, max_path_len, 0)
3649}
3650
3651pub fn format_stat_line_width(
3652 path: &str,
3653 insertions: usize,
3654 deletions: usize,
3655 max_path_len: usize,
3656 count_width: usize,
3657) -> String {
3658 let total = insertions + deletions;
3659 let plus = "+".repeat(insertions.min(50));
3660 let minus = "-".repeat(deletions.min(50));
3661 let cw = if count_width > 0 {
3662 count_width
3663 } else {
3664 format!("{}", total).len()
3665 };
3666 let bar = format!("{}{}", plus, minus);
3667 if bar.is_empty() {
3668 format!(
3669 " {:<width$} | {:>cw$}",
3670 path,
3671 total,
3672 width = max_path_len,
3673 cw = cw
3674 )
3675 } else {
3676 format!(
3677 " {:<width$} | {:>cw$} {}",
3678 path,
3679 total,
3680 bar,
3681 width = max_path_len,
3682 cw = cw
3683 )
3684 }
3685}
3686
3687#[must_use]
3689pub fn normalize_ignore_space_change_line(line: &str) -> String {
3690 let mut result = String::with_capacity(line.len());
3691 let mut in_space = false;
3692 for c in line.chars() {
3693 if c.is_whitespace() {
3694 if !in_space {
3695 result.push(' ');
3696 in_space = true;
3697 }
3698 } else {
3699 result.push(c);
3700 in_space = false;
3701 }
3702 }
3703 while result.ends_with(' ') {
3704 result.pop();
3705 }
3706 result
3707}
3708
3709#[must_use]
3715pub fn normalize_ignore_space_change(content: &str) -> String {
3716 content
3717 .lines()
3718 .map(normalize_ignore_space_change_line)
3719 .collect::<Vec<_>>()
3720 .join("\n")
3721}
3722
3723pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
3727 count_changes_with_algorithm(old_content, new_content, similar::Algorithm::Myers, false)
3728}
3729
3730#[must_use]
3735pub fn count_changes_with_algorithm(
3736 old_content: &str,
3737 new_content: &str,
3738 algorithm: similar::Algorithm,
3739 use_git_histogram: bool,
3740) -> (usize, usize) {
3741 if use_git_histogram {
3742 use imara_diff::{Algorithm, Diff, InternedInput};
3743 let input = InternedInput::new(old_content, new_content);
3744 let mut d = Diff::compute(Algorithm::Histogram, &input);
3745 d.postprocess_lines(&input);
3746 return (d.count_additions() as usize, d.count_removals() as usize);
3747 }
3748
3749 use similar::{ChangeTag, TextDiff};
3750
3751 let diff = TextDiff::configure()
3752 .algorithm(algorithm)
3753 .diff_lines(old_content, new_content);
3754 let mut ins = 0;
3755 let mut del = 0;
3756
3757 for change in diff.iter_all_changes() {
3758 match change.tag() {
3759 ChangeTag::Insert => ins += 1,
3760 ChangeTag::Delete => del += 1,
3761 ChangeTag::Equal => {}
3762 }
3763 }
3764
3765 (ins, del)
3766}
3767
3768#[must_use]
3773pub fn count_git_lines(data: &[u8]) -> usize {
3774 if data.is_empty() {
3775 return 0;
3776 }
3777 let mut count = 0usize;
3778 let mut nl_just_seen = false;
3779 for &ch in data {
3780 if ch == b'\n' {
3781 count += 1;
3782 nl_just_seen = true;
3783 } else {
3784 nl_just_seen = false;
3785 }
3786 }
3787 if !nl_just_seen {
3788 count += 1;
3789 }
3790 count
3791}
3792
3793pub const GIT_DIFF_MAX_SCORE: u64 = 60_000;
3795const DIFF_MAX_SCORE: u64 = GIT_DIFF_MAX_SCORE;
3796const DIFF_MINIMUM_BREAK_SIZE: usize = 400;
3797const DIFF_DEFAULT_BREAK_SCORE: u64 = 30_000;
3798pub const GIT_DIFF_DEFAULT_BREAK_SCORE: u64 = DIFF_DEFAULT_BREAK_SCORE;
3800pub const GIT_DIFF_DEFAULT_MERGE_SCORE_AFTER_BREAK: u64 = 36_000;
3803const DIFF_HASHBASE: u32 = 107_927;
3804
3805#[derive(Clone, Copy, Default)]
3806struct SpanSlot {
3807 hashval: u32,
3808 cnt: u32,
3809}
3810
3811struct SpanHashTop {
3812 alloc_log2: u8,
3813 free_slots: i32,
3814 data: Vec<SpanSlot>,
3815}
3816
3817impl SpanHashTop {
3818 fn new(initial_log2: u8) -> Self {
3819 let cap = 1usize << initial_log2;
3820 Self {
3821 alloc_log2: initial_log2,
3822 free_slots: initial_free(initial_log2),
3823 data: vec![SpanSlot::default(); cap],
3824 }
3825 }
3826
3827 fn len(&self) -> usize {
3828 1usize << self.alloc_log2
3829 }
3830
3831 fn add_span(&mut self, hashval: u32, cnt: u32) {
3832 loop {
3833 let lim = self.len();
3834 let mut bucket = (hashval as usize) & (lim - 1);
3835 loop {
3836 let h = &mut self.data[bucket];
3837 if h.cnt == 0 {
3838 h.hashval = hashval;
3839 h.cnt = cnt;
3840 self.free_slots -= 1;
3841 if self.free_slots < 0 {
3842 self.rehash();
3843 break;
3844 }
3845 return;
3846 }
3847 if h.hashval == hashval {
3848 h.cnt = h.cnt.saturating_add(cnt);
3849 return;
3850 }
3851 bucket += 1;
3852 if bucket >= lim {
3853 bucket = 0;
3854 }
3855 }
3856 }
3857 }
3858
3859 fn rehash(&mut self) {
3860 let old = std::mem::take(&mut self.data);
3861 let old_log = self.alloc_log2;
3862 self.alloc_log2 = old_log.saturating_add(1);
3863 let new_len = 1usize << self.alloc_log2;
3864 self.free_slots = initial_free(self.alloc_log2);
3865 self.data = vec![SpanSlot::default(); new_len];
3866 let old_sz = 1usize << old_log;
3867 for o in old.iter().take(old_sz) {
3868 let o = *o;
3869 if o.cnt == 0 {
3870 continue;
3871 }
3872 self.add_span_after_rehash(o.hashval, o.cnt);
3873 }
3874 }
3875
3876 fn add_span_after_rehash(&mut self, hashval: u32, cnt: u32) {
3877 loop {
3878 let lim = self.len();
3879 let mut bucket = (hashval as usize) & (lim - 1);
3880 loop {
3881 let h = &mut self.data[bucket];
3882 if h.cnt == 0 {
3883 h.hashval = hashval;
3884 h.cnt = cnt;
3885 self.free_slots -= 1;
3886 if self.free_slots < 0 {
3887 self.rehash();
3888 break;
3889 }
3890 return;
3891 }
3892 if h.hashval == hashval {
3893 h.cnt = h.cnt.saturating_add(cnt);
3894 return;
3895 }
3896 bucket += 1;
3897 if bucket >= lim {
3898 bucket = 0;
3899 }
3900 }
3901 }
3902 }
3903
3904 fn sort_by_hashval(&mut self) {
3905 let sz = self.len();
3906 self.data[..sz].sort_by(|a, b| {
3907 if a.cnt == 0 {
3908 return std::cmp::Ordering::Greater;
3909 }
3910 if b.cnt == 0 {
3911 return std::cmp::Ordering::Less;
3912 }
3913 a.hashval.cmp(&b.hashval)
3914 });
3915 }
3916}
3917
3918fn initial_free(sz_log2: u8) -> i32 {
3919 let sz = sz_log2 as i32;
3920 ((1i32 << sz_log2) * (sz - 3) / sz).max(0)
3921}
3922
3923fn hash_blob_spans(buf: &[u8], is_text: bool) -> SpanHashTop {
3924 let mut hash = SpanHashTop::new(9);
3925 let mut n = 0u32;
3926 let mut accum1: u32 = 0;
3927 let mut accum2: u32 = 0;
3928 let mut i = 0usize;
3929 while i < buf.len() {
3930 let c = buf[i] as u32;
3931 let old_1 = accum1;
3932 i += 1;
3933
3934 if is_text && c == b'\r' as u32 && i < buf.len() && buf[i] == b'\n' {
3935 continue;
3936 }
3937
3938 accum1 = accum1.wrapping_shl(7) ^ accum2.wrapping_shr(25);
3939 accum2 = accum2.wrapping_shl(7) ^ old_1.wrapping_shr(25);
3940 accum1 = accum1.wrapping_add(c);
3941 n += 1;
3942 if n < 64 && c != b'\n' as u32 {
3943 continue;
3944 }
3945 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
3946 hash.add_span(hashval, n);
3947 n = 0;
3948 accum1 = 0;
3949 accum2 = 0;
3950 }
3951 if n > 0 {
3952 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
3953 hash.add_span(hashval, n);
3954 }
3955 hash.sort_by_hashval();
3956 hash
3957}
3958
3959#[must_use]
3964pub fn diffcore_count_changes(old: &[u8], new: &[u8]) -> (u64, u64) {
3965 let src_is_text = !crate::merge_file::is_binary(old);
3966 let dst_is_text = !crate::merge_file::is_binary(new);
3967 let src_count = hash_blob_spans(old, src_is_text);
3968 let dst_count = hash_blob_spans(new, dst_is_text);
3969 let mut sc: u64 = 0;
3970 let mut la: u64 = 0;
3971 let mut si = 0usize;
3972 let mut di = 0usize;
3973 let src_len = src_count.len();
3974 let dst_len = dst_count.len();
3975 loop {
3976 if si >= src_len || src_count.data[si].cnt == 0 {
3977 break;
3978 }
3979 let s_hash = src_count.data[si].hashval;
3980 let s_cnt = u64::from(src_count.data[si].cnt);
3981 while di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval < s_hash {
3982 la += u64::from(dst_count.data[di].cnt);
3983 di += 1;
3984 }
3985 let mut dst_cnt = 0u64;
3986 if di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval == s_hash {
3987 dst_cnt = u64::from(dst_count.data[di].cnt);
3988 di += 1;
3989 }
3990 if s_cnt < dst_cnt {
3991 la += dst_cnt - s_cnt;
3992 sc += s_cnt;
3993 } else {
3994 sc += dst_cnt;
3995 }
3996 si += 1;
3997 }
3998 while di < dst_len && dst_count.data[di].cnt != 0 {
3999 la += u64::from(dst_count.data[di].cnt);
4000 di += 1;
4001 }
4002 (sc, la)
4003}
4004
4005#[must_use]
4008pub fn should_break_rewrite_for_stat(old: &[u8], new: &[u8]) -> bool {
4009 should_break_rewrite_inner(old, new, DIFF_DEFAULT_BREAK_SCORE)
4010}
4011
4012#[must_use]
4016pub fn should_break_rewrite_pair(old: &[u8], new: &[u8], break_score: u64) -> bool {
4017 should_break_rewrite_inner(old, new, break_score)
4018}
4019
4020pub fn parse_diff_rename_score_token(arg: &str) -> Option<u64> {
4023 let mut num: u64 = 0;
4024 let mut scale: u64 = 1;
4025 let mut dot = false;
4026 let mut saw_digit = false;
4027 for ch in arg.chars() {
4028 if !dot && ch == '.' {
4029 scale = 1;
4030 dot = true;
4031 continue;
4032 }
4033 if ch == '%' {
4034 scale = if dot { scale.saturating_mul(100) } else { 100 };
4035 break;
4036 }
4037 if ch.is_ascii_digit() {
4038 saw_digit = true;
4039 if scale < 100_000 {
4040 scale = scale.saturating_mul(10);
4041 num = num.saturating_mul(10) + u64::from(ch as u8 - b'0');
4042 }
4043 } else {
4044 break;
4045 }
4046 }
4047 if !saw_digit {
4048 return None;
4049 }
4050 Some(if num >= scale {
4051 GIT_DIFF_MAX_SCORE
4052 } else {
4053 GIT_DIFF_MAX_SCORE * num / scale
4054 })
4055}
4056
4057#[must_use]
4060pub fn rewrite_merge_score(old: &[u8], new: &[u8]) -> Option<u64> {
4061 if old.is_empty() {
4062 return None;
4063 }
4064 let max_size = old.len().max(new.len());
4065 if max_size < DIFF_MINIMUM_BREAK_SIZE {
4066 return None;
4067 }
4068 let (src_copied, _) = diffcore_count_changes(old, new);
4069 let src_copied = src_copied.min(old.len() as u64);
4070 let src_removed = (old.len() as u64).saturating_sub(src_copied);
4071 Some(src_removed * DIFF_MAX_SCORE / old.len() as u64)
4072}
4073
4074#[must_use]
4076pub fn rewrite_dissimilarity_index_percent(old: &[u8], new: &[u8]) -> Option<u32> {
4077 let score = rewrite_merge_score(old, new)?;
4078 Some((score * 100 / DIFF_MAX_SCORE).min(100) as u32)
4079}
4080
4081fn should_break_rewrite_inner(src: &[u8], dst: &[u8], break_score: u64) -> bool {
4082 if src.is_empty() {
4083 return false;
4084 }
4085 let max_size = src.len().max(dst.len());
4086 if max_size < DIFF_MINIMUM_BREAK_SIZE {
4087 return false;
4088 }
4089 let (src_copied, literal_added) = diffcore_count_changes(src, dst);
4090 let src_copied = src_copied.min(src.len() as u64);
4091 let mut literal_added = literal_added;
4092 let dst_len = dst.len() as u64;
4093 if src_copied < dst_len && literal_added + src_copied > dst_len {
4094 literal_added = dst_len.saturating_sub(src_copied);
4095 }
4096 let src_removed = (src.len() as u64).saturating_sub(src_copied);
4097 let merge_score = src_removed * DIFF_MAX_SCORE / src.len() as u64;
4098 if merge_score > break_score {
4099 return true;
4100 }
4101 let delta_size = src_removed.saturating_add(literal_added);
4102 if delta_size * DIFF_MAX_SCORE / (max_size as u64) < break_score {
4103 return false;
4104 }
4105 let s = src.len() as u64;
4106 if (s * break_score < src_removed * DIFF_MAX_SCORE)
4107 && (literal_added * 20 < src_removed)
4108 && (literal_added * 20 < src_copied)
4109 {
4110 return false;
4111 }
4112 true
4113}
4114
4115struct FlatEntry {
4119 path: String,
4120 mode: u32,
4121 oid: ObjectId,
4122}
4123
4124fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
4125 let entries = read_tree(odb, tree_oid)?;
4126 let mut result = Vec::new();
4127
4128 for entry in entries {
4129 let name_str = String::from_utf8_lossy(&entry.name);
4130 let path = format_path(prefix, &name_str);
4131 if is_tree_mode(entry.mode) {
4132 let nested = flatten_tree(odb, &entry.oid, &path)?;
4133 result.extend(nested);
4134 } else {
4135 result.push(FlatEntry {
4136 path,
4137 mode: entry.mode,
4138 oid: entry.oid,
4139 });
4140 }
4141 }
4142
4143 Ok(result)
4144}
4145
4146pub fn head_path_states(
4148 odb: &Odb,
4149 head_tree: Option<&ObjectId>,
4150) -> Result<std::collections::BTreeMap<String, (u32, ObjectId)>> {
4151 let mut m = std::collections::BTreeMap::new();
4152 let Some(t) = head_tree else {
4153 return Ok(m);
4154 };
4155 for fe in flatten_tree(odb, t, "")? {
4156 m.insert(fe.path, (fe.mode, fe.oid));
4157 }
4158 Ok(m)
4159}
4160
4161fn is_tree_mode(mode: u32) -> bool {
4163 mode == 0o040000
4164}
4165
4166fn format_path(prefix: &str, name: &str) -> String {
4168 if prefix.is_empty() {
4169 name.to_owned()
4170 } else {
4171 format!("{prefix}/{name}")
4172 }
4173}
4174
4175pub fn format_mode(mode: u32) -> String {
4177 format!("{mode:06o}")
4178}
4179
4180#[must_use]
4184pub fn read_submodule_head_for_checkout(sub_dir: &Path) -> Option<ObjectId> {
4185 read_submodule_head(sub_dir)
4186}
4187
4188#[must_use]
4193pub fn submodule_commit_subject_line(c: &CommitData) -> String {
4194 let enc = c.encoding.as_deref().unwrap_or("UTF-8");
4195 let is_latin1 = enc.eq_ignore_ascii_case("ISO8859-1")
4196 || enc.eq_ignore_ascii_case("ISO-8859-1")
4197 || enc.eq_ignore_ascii_case("LATIN1")
4198 || enc.eq_ignore_ascii_case("ISO-8859-15");
4199 if let Some(raw) = c.raw_message.as_deref() {
4200 let line = raw.split(|b| *b == b'\n').next().unwrap_or(raw);
4201 if is_latin1 {
4202 return line
4203 .iter()
4204 .map(|&b| b as char)
4205 .collect::<String>()
4206 .trim()
4207 .to_owned();
4208 }
4209 return String::from_utf8_lossy(line).trim().to_string();
4210 }
4211 c.message.lines().next().unwrap_or("").trim().to_owned()
4212}
4213
4214fn submodule_worktree_is_unpopulated_placeholder(sub_dir: &Path) -> bool {
4217 match fs::read_dir(sub_dir) {
4218 Ok(mut it) => it.next().is_none(),
4219 Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
4220 Err(_) => false,
4221 }
4222}
4223
4224fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
4225 read_submodule_head_oid(sub_dir)
4226}
4227
4228#[must_use]
4230pub fn submodule_embedded_git_dir(sub_dir: &Path) -> Option<PathBuf> {
4231 let gitfile = sub_dir.join(".git");
4232 if gitfile.is_file() {
4233 let content = fs::read_to_string(&gitfile).ok()?;
4234 let gitdir = content
4235 .lines()
4236 .find_map(|l| l.strip_prefix("gitdir: "))?
4237 .trim();
4238 Some(if Path::new(gitdir).is_absolute() {
4239 PathBuf::from(gitdir)
4240 } else {
4241 sub_dir.join(gitdir)
4242 })
4243 } else if gitfile.is_dir() {
4244 Some(gitfile)
4245 } else {
4246 None
4247 }
4248}
4249
4250fn find_superproject_git(sub_dir: &Path) -> Option<(PathBuf, PathBuf)> {
4252 let mut cur = sub_dir.parent()?;
4253 loop {
4254 let git_path = cur.join(".git");
4255 if git_path.exists() {
4256 let gd = if git_path.is_file() {
4257 let content = fs::read_to_string(&git_path).ok()?;
4258 let line = content
4259 .lines()
4260 .find_map(|l| l.strip_prefix("gitdir: "))?
4261 .trim();
4262 if Path::new(line).is_absolute() {
4263 PathBuf::from(line)
4264 } else {
4265 cur.join(line)
4266 }
4267 } else {
4268 git_path
4269 };
4270 return Some((cur.to_path_buf(), gd));
4271 }
4272 cur = cur.parent()?;
4273 }
4274}
4275
4276pub fn read_submodule_head_oid(sub_dir: &Path) -> Option<ObjectId> {
4282 let mut git_dir = submodule_embedded_git_dir(sub_dir)?;
4285 if let Some((super_wt, super_git_dir)) = find_superproject_git(sub_dir) {
4286 let rel = sub_dir.strip_prefix(&super_wt).ok()?;
4287 let rel_str = rel.to_string_lossy().replace('\\', "/");
4288 let local_mod = super_git_dir
4289 .join("modules")
4290 .join(rel_str.trim_start_matches('/'));
4291 if local_mod.join("HEAD").exists() {
4292 let sg = super_git_dir.canonicalize().unwrap_or(super_git_dir);
4293 let cur = git_dir.canonicalize().unwrap_or_else(|_| git_dir.clone());
4294 if !cur.starts_with(&sg) {
4295 git_dir = local_mod;
4296 }
4297 }
4298 }
4299 let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
4300 let head_trimmed = head_content.trim();
4301 if head_trimmed.starts_with("ref: ") {
4302 match crate::refs::resolve_ref(&git_dir, "HEAD") {
4306 Ok(oid) => Some(oid),
4307 Err(_) => {
4308 let mut found = None;
4309 for branch in ["main", "master"] {
4310 let p = git_dir.join("refs/heads").join(branch);
4311 if let Ok(s) = fs::read_to_string(&p) {
4312 if let Ok(o) = ObjectId::from_hex(s.trim()) {
4313 found = Some(o);
4314 break;
4315 }
4316 }
4317 }
4318 found
4319 }
4320 }
4321 } else {
4322 ObjectId::from_hex(head_trimmed).ok()
4323 }
4324}
4325
4326fn submodule_has_dirty_worktree_for_super_diff(
4329 super_worktree: &Path,
4330 rel_path: &str,
4331 recorded_oid: &ObjectId,
4332) -> bool {
4333 let flags = submodule_porcelain_flags(super_worktree, rel_path, *recorded_oid);
4334 flags.modified || flags.untracked
4335}
4336
4337#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
4339pub struct SubmodulePorcelainFlags {
4340 pub new_commits: bool,
4342 pub modified: bool,
4344 pub untracked: bool,
4346}
4347
4348pub fn submodule_porcelain_flags(
4354 super_worktree: &Path,
4355 rel_path: &str,
4356 recorded_oid: ObjectId,
4357) -> SubmodulePorcelainFlags {
4358 let sub_dir = super_worktree.join(rel_path);
4359 let Some(sub_git_dir) = submodule_embedded_git_dir(&sub_dir) else {
4360 return SubmodulePorcelainFlags::default();
4361 };
4362 let Some(sub_head) = read_submodule_head_oid(&sub_dir) else {
4363 return SubmodulePorcelainFlags::default();
4364 };
4365
4366 let new_commits = sub_head != recorded_oid;
4367
4368 let index_path = sub_git_dir.join("index");
4369 let sub_index = match crate::index::Index::load(&index_path) {
4370 Ok(ix) => ix,
4371 Err(_) => {
4372 return SubmodulePorcelainFlags {
4373 new_commits,
4374 ..Default::default()
4375 }
4376 }
4377 };
4378
4379 let tracked: std::collections::BTreeSet<String> = sub_index
4380 .entries
4381 .iter()
4382 .filter(|e| e.stage() == 0)
4383 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
4384 .collect();
4385 let untracked = submodule_dir_has_untracked_inner(&sub_dir, &sub_dir, &tracked, &sub_index);
4386
4387 let objects_dir = sub_git_dir.join("objects");
4388 let odb = Odb::new(&objects_dir);
4389
4390 let sub_head_tree = (|| -> Option<ObjectId> {
4391 let h = fs::read_to_string(sub_git_dir.join("HEAD")).ok()?;
4392 let h_str = h.trim();
4393 let commit_oid = if let Some(r) = h_str.strip_prefix("ref: ") {
4394 let oid_hex = fs::read_to_string(sub_git_dir.join(r)).ok()?;
4395 ObjectId::from_hex(oid_hex.trim()).ok()?
4396 } else {
4397 ObjectId::from_hex(h_str).ok()?
4398 };
4399 let obj = odb.read(&commit_oid).ok()?;
4400 let commit = parse_commit(&obj.data).ok()?;
4401 Some(commit.tree)
4402 })();
4403
4404 let staged_dirty = sub_head_tree
4405 .as_ref()
4406 .map(|t| diff_index_to_tree(&odb, &sub_index, Some(t), false).map(|v| !v.is_empty()))
4407 .unwrap_or(Ok(false));
4408 let staged_dirty = staged_dirty.unwrap_or(false);
4409
4410 let unstaged_dirty = diff_index_to_worktree(&odb, &sub_index, &sub_dir, false, true)
4411 .map(|v| !v.is_empty())
4412 .unwrap_or(false);
4413
4414 let mut modified = staged_dirty || unstaged_dirty;
4415
4416 for e in &sub_index.entries {
4421 if e.stage() != 0 || e.mode != 0o160000 {
4422 continue;
4423 }
4424 let child = String::from_utf8_lossy(&e.path).into_owned();
4425 let full_rel = if rel_path.is_empty() {
4426 child
4427 } else {
4428 format!("{rel_path}/{child}")
4429 };
4430 let nested = submodule_porcelain_flags(super_worktree, &full_rel, e.oid);
4431 modified |= nested.modified;
4432 }
4433
4434 SubmodulePorcelainFlags {
4435 new_commits,
4436 modified,
4437 untracked,
4438 }
4439}
4440
4441fn submodule_dir_has_untracked_inner(
4442 dir: &Path,
4443 root: &Path,
4444 tracked: &std::collections::BTreeSet<String>,
4445 owning_index: &Index,
4446) -> bool {
4447 let entries = match fs::read_dir(dir) {
4448 Ok(e) => e,
4449 Err(_) => return false,
4450 };
4451 let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
4452 sorted.sort_by_key(|e| e.file_name());
4453
4454 for entry in sorted {
4455 let name = entry.file_name().to_string_lossy().to_string();
4456 if name == ".git" {
4457 continue;
4458 }
4459 let path = entry.path();
4460 let rel = path
4461 .strip_prefix(root)
4462 .map(|p| p.to_string_lossy().to_string())
4463 .unwrap_or_else(|_| name.clone());
4464
4465 let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
4466 if is_dir {
4467 let is_gitlink = owning_index
4468 .get(rel.as_bytes(), 0)
4469 .is_some_and(|e| e.mode == 0o160000);
4470 if is_gitlink {
4471 let Some(nested_git) = submodule_embedded_git_dir(&path) else {
4472 continue;
4473 };
4474 let nested_index_path = nested_git.join("index");
4475 let Ok(nested_ix) = crate::index::Index::load(&nested_index_path) else {
4476 continue;
4477 };
4478 let nested_tracked: std::collections::BTreeSet<String> = nested_ix
4479 .entries
4480 .iter()
4481 .filter(|e| e.stage() == 0)
4482 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
4483 .collect();
4484 if submodule_dir_has_untracked_inner(&path, &path, &nested_tracked, &nested_ix) {
4485 return true;
4486 }
4487 } else if submodule_dir_has_untracked_inner(&path, root, tracked, owning_index) {
4488 return true;
4489 }
4490 } else if !tracked.contains(&rel) {
4491 return true;
4492 }
4493 }
4494 false
4495}