1use std::fs;
23use std::os::unix::fs::MetadataExt;
24use std::path::{Path, PathBuf};
25
26use crate::error::{Error, Result};
27use crate::index::{Index, IndexEntry};
28use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind, TreeEntry};
29use crate::odb::Odb;
30use crate::userdiff::FuncnameMatcher;
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum DiffStatus {
35 Added,
37 Deleted,
39 Modified,
41 Renamed,
43 Copied,
45 TypeChanged,
47 Unmerged,
49}
50
51impl DiffStatus {
52 #[must_use]
54 pub fn letter(&self) -> char {
55 match self {
56 Self::Added => 'A',
57 Self::Deleted => 'D',
58 Self::Modified => 'M',
59 Self::Renamed => 'R',
60 Self::Copied => 'C',
61 Self::TypeChanged => 'T',
62 Self::Unmerged => 'U',
63 }
64 }
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct DiffEntry {
70 pub status: DiffStatus,
72 pub old_path: Option<String>,
74 pub new_path: Option<String>,
76 pub old_mode: String,
78 pub new_mode: String,
80 pub old_oid: ObjectId,
82 pub new_oid: ObjectId,
84 pub score: Option<u32>,
86}
87
88impl DiffEntry {
89 #[must_use]
91 pub fn path(&self) -> &str {
92 self.new_path
93 .as_deref()
94 .or(self.old_path.as_deref())
95 .unwrap_or("")
96 }
97
98 #[must_use]
103 pub fn display_path(&self) -> String {
104 match self.status {
105 DiffStatus::Renamed | DiffStatus::Copied => {
106 let old = self.old_path.as_deref().unwrap_or("");
107 let new = self.new_path.as_deref().unwrap_or("");
108 if old.is_empty() || new.is_empty() {
109 self.path().to_owned()
110 } else {
111 format!("{old} -> {new}")
112 }
113 }
114 _ => self.path().to_owned(),
115 }
116 }
117}
118
119pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
121
122#[must_use]
124pub fn zero_oid() -> ObjectId {
125 ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
126 panic!("internal error: failed to create zero OID");
128 })
129}
130
131#[must_use]
133pub fn empty_blob_oid() -> ObjectId {
134 ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
135 panic!("internal error: failed to create empty blob OID");
137 })
138}
139
140pub fn diff_trees(
155 odb: &Odb,
156 old_tree_oid: Option<&ObjectId>,
157 new_tree_oid: Option<&ObjectId>,
158 prefix: &str,
159) -> Result<Vec<DiffEntry>> {
160 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
161}
162
163pub fn diff_trees_show_tree_entries(
167 odb: &Odb,
168 old_tree_oid: Option<&ObjectId>,
169 new_tree_oid: Option<&ObjectId>,
170 prefix: &str,
171) -> Result<Vec<DiffEntry>> {
172 diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
173}
174
175fn diff_trees_opts(
176 odb: &Odb,
177 old_tree_oid: Option<&ObjectId>,
178 new_tree_oid: Option<&ObjectId>,
179 prefix: &str,
180 show_trees: bool,
181) -> Result<Vec<DiffEntry>> {
182 let old_entries = match old_tree_oid {
183 Some(oid) => read_tree(odb, oid)?,
184 None => Vec::new(),
185 };
186 let new_entries = match new_tree_oid {
187 Some(oid) => read_tree(odb, oid)?,
188 None => Vec::new(),
189 };
190
191 let mut result = Vec::new();
192 diff_tree_entries_opts(
193 odb,
194 &old_entries,
195 &new_entries,
196 prefix,
197 show_trees,
198 &mut result,
199 )?;
200 Ok(result)
201}
202
203fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
205 let obj = odb.read(oid)?;
206 if obj.kind != ObjectKind::Tree {
207 return Err(Error::CorruptObject(format!(
208 "expected tree, got {}",
209 obj.kind.as_str()
210 )));
211 }
212 parse_tree(&obj.data)
213}
214
215fn diff_tree_entries_opts(
217 odb: &Odb,
218 old: &[TreeEntry],
219 new: &[TreeEntry],
220 prefix: &str,
221 show_trees: bool,
222 result: &mut Vec<DiffEntry>,
223) -> Result<()> {
224 let mut oi = 0;
225 let mut ni = 0;
226
227 while oi < old.len() || ni < new.len() {
228 match (old.get(oi), new.get(ni)) {
229 (Some(o), Some(n)) => {
230 let cmp = crate::objects::tree_entry_cmp(
231 &o.name,
232 is_tree_mode(o.mode),
233 &n.name,
234 is_tree_mode(n.mode),
235 );
236 match cmp {
237 std::cmp::Ordering::Less => {
238 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
240 oi += 1;
241 }
242 std::cmp::Ordering::Greater => {
243 emit_added_opts(odb, n, prefix, show_trees, result)?;
245 ni += 1;
246 }
247 std::cmp::Ordering::Equal => {
248 if o.oid != n.oid || o.mode != n.mode {
250 let name_str = String::from_utf8_lossy(&o.name);
251 let path = format_path(prefix, &name_str);
252 if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
253 if show_trees {
255 result.push(DiffEntry {
256 status: DiffStatus::Modified,
257 old_path: Some(path.clone()),
258 new_path: Some(path.clone()),
259 old_mode: format_mode(o.mode),
260 new_mode: format_mode(n.mode),
261 old_oid: o.oid,
262 new_oid: n.oid,
263 score: None,
264 });
265 }
266 let nested = diff_trees_opts(
268 odb,
269 Some(&o.oid),
270 Some(&n.oid),
271 &path,
272 show_trees,
273 )?;
274 result.extend(nested);
275 } else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
276 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
278 emit_added_opts(odb, n, prefix, show_trees, result)?;
279 } else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
280 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
282 emit_added_opts(odb, n, prefix, show_trees, result)?;
283 } else {
284 let old_type = o.mode & 0o170000;
288 let new_type = n.mode & 0o170000;
289 result.push(DiffEntry {
290 status: if old_type != new_type {
291 DiffStatus::TypeChanged
292 } else {
293 DiffStatus::Modified
294 },
295 old_path: Some(path.clone()),
296 new_path: Some(path),
297 old_mode: format_mode(o.mode),
298 new_mode: format_mode(n.mode),
299 old_oid: o.oid,
300 new_oid: n.oid,
301 score: None,
302 });
303 }
304 }
305 oi += 1;
306 ni += 1;
307 }
308 }
309 }
310 (Some(o), None) => {
311 emit_deleted_opts(odb, o, prefix, show_trees, result)?;
312 oi += 1;
313 }
314 (None, Some(n)) => {
315 emit_added_opts(odb, n, prefix, show_trees, result)?;
316 ni += 1;
317 }
318 (None, None) => break,
319 }
320 }
321
322 Ok(())
323}
324
325fn emit_deleted_opts(
326 odb: &Odb,
327 entry: &TreeEntry,
328 prefix: &str,
329 show_trees: bool,
330 result: &mut Vec<DiffEntry>,
331) -> Result<()> {
332 let name_str = String::from_utf8_lossy(&entry.name);
333 let path = format_path(prefix, &name_str);
334 if is_tree_mode(entry.mode) {
335 if show_trees {
336 result.push(DiffEntry {
337 status: DiffStatus::Deleted,
338 old_path: Some(path.clone()),
339 new_path: None,
340 old_mode: format_mode(entry.mode),
341 new_mode: "000000".to_owned(),
342 old_oid: entry.oid,
343 new_oid: zero_oid(),
344 score: None,
345 });
346 }
347 let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
349 result.extend(nested);
350 } else {
351 result.push(DiffEntry {
352 status: DiffStatus::Deleted,
353 old_path: Some(path.clone()),
354 new_path: None,
355 old_mode: format_mode(entry.mode),
356 new_mode: "000000".to_owned(),
357 old_oid: entry.oid,
358 new_oid: zero_oid(),
359 score: None,
360 });
361 }
362 Ok(())
363}
364
365fn emit_added_opts(
366 odb: &Odb,
367 entry: &TreeEntry,
368 prefix: &str,
369 show_trees: bool,
370 result: &mut Vec<DiffEntry>,
371) -> Result<()> {
372 let name_str = String::from_utf8_lossy(&entry.name);
373 let path = format_path(prefix, &name_str);
374 if is_tree_mode(entry.mode) {
375 if show_trees {
376 result.push(DiffEntry {
377 status: DiffStatus::Added,
378 old_path: None,
379 new_path: Some(path.clone()),
380 old_mode: "000000".to_owned(),
381 new_mode: format_mode(entry.mode),
382 old_oid: zero_oid(),
383 new_oid: entry.oid,
384 score: None,
385 });
386 }
387 let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
389 result.extend(nested);
390 } else {
391 result.push(DiffEntry {
392 status: DiffStatus::Added,
393 old_path: None,
394 new_path: Some(path),
395 old_mode: "000000".to_owned(),
396 new_mode: format_mode(entry.mode),
397 old_oid: zero_oid(),
398 new_oid: entry.oid,
399 score: None,
400 });
401 }
402 Ok(())
403}
404
405pub fn diff_index_to_tree(
422 odb: &Odb,
423 index: &Index,
424 tree_oid: Option<&ObjectId>,
425) -> Result<Vec<DiffEntry>> {
426 let tree_entries = match tree_oid {
428 Some(oid) => flatten_tree(odb, oid, "")?,
429 None => Vec::new(),
430 };
431
432 let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
434 std::collections::BTreeMap::new();
435 for entry in &tree_entries {
436 tree_map.insert(&entry.path, entry);
437 }
438
439 let mut result = Vec::new();
440 let mut stage0_paths = std::collections::BTreeSet::new();
441 let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
442 std::collections::BTreeMap::new();
443
444 for ie in &index.entries {
446 let path = String::from_utf8_lossy(&ie.path).to_string();
447 if ie.stage() == 0 && ie.intent_to_add() {
448 continue;
451 }
452 if ie.stage() != 0 {
453 let rank = match ie.stage() {
454 2 => 0u8,
455 3 => 1u8,
456 1 => 2u8,
457 _ => 3u8,
458 };
459 match unmerged_modes.get(&path) {
460 Some((existing_rank, _)) if *existing_rank <= rank => {}
461 _ => {
462 unmerged_modes.insert(path, (rank, ie.mode));
463 }
464 }
465 continue;
466 }
467 stage0_paths.insert(path.clone());
468 match tree_map.remove(path.as_str()) {
469 Some(te) => {
470 if te.oid != ie.oid || te.mode != ie.mode {
472 result.push(DiffEntry {
473 status: DiffStatus::Modified,
474 old_path: Some(path.clone()),
475 new_path: Some(path),
476 old_mode: format_mode(te.mode),
477 new_mode: format_mode(ie.mode),
478 old_oid: te.oid,
479 new_oid: ie.oid,
480 score: None,
481 });
482 }
483 }
484 None => {
485 result.push(DiffEntry {
487 status: DiffStatus::Added,
488 old_path: None,
489 new_path: Some(path),
490 old_mode: "000000".to_owned(),
491 new_mode: format_mode(ie.mode),
492 old_oid: zero_oid(),
493 new_oid: ie.oid,
494 score: None,
495 });
496 }
497 }
498 }
499
500 for (path, (_, mode)) in &unmerged_modes {
501 if stage0_paths.contains(path) {
502 continue;
503 }
504 tree_map.remove(path.as_str());
505 result.push(DiffEntry {
506 status: DiffStatus::Unmerged,
507 old_path: Some(path.clone()),
508 new_path: Some(path.clone()),
509 old_mode: "000000".to_owned(),
510 new_mode: format_mode(*mode),
511 old_oid: zero_oid(),
512 new_oid: zero_oid(),
513 score: None,
514 });
515 }
516
517 for (path, te) in tree_map {
519 result.push(DiffEntry {
520 status: DiffStatus::Deleted,
521 old_path: Some(path.to_owned()),
522 new_path: None,
523 old_mode: format_mode(te.mode),
524 new_mode: "000000".to_owned(),
525 old_oid: te.oid,
526 new_oid: zero_oid(),
527 score: None,
528 });
529 }
530
531 result.sort_by(|a, b| a.path().cmp(b.path()));
532 Ok(result)
533}
534
535pub fn diff_index_to_worktree(
554 odb: &Odb,
555 index: &Index,
556 work_tree: &Path,
557) -> Result<Vec<DiffEntry>> {
558 use crate::config::ConfigSet;
559 use crate::crlf;
560
561 let git_dir = work_tree.join(".git");
562 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
563 let conv = crlf::ConversionConfig::from_config(&config);
564 let attrs = crlf::load_gitattributes(work_tree);
565
566 let mut result = Vec::new();
567 let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
568 std::collections::BTreeMap::new();
569
570 for ie in &index.entries {
571 if ie.stage() != 0 {
572 let path = String::from_utf8_lossy(&ie.path).to_string();
573 let rank = match ie.stage() {
574 2 => 0u8,
575 3 => 1u8,
576 1 => 2u8,
577 _ => 3u8,
578 };
579 match unmerged_base.get(&path) {
580 Some((existing_rank, _)) if *existing_rank <= rank => {}
581 _ => {
582 unmerged_base.insert(path, (rank, ie));
583 }
584 }
585 continue;
586 }
587 if ie.skip_worktree() {
588 continue;
591 }
592 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
595 let is_intent_to_add = ie.intent_to_add();
596
597 if ie.assume_unchanged() || ie.skip_worktree() {
598 continue;
599 }
600
601 if ie.mode == 0o160000 {
604 let sub_dir = work_tree.join(path_str_ref);
605 let sub_head_oid = read_submodule_head_oid(&sub_dir);
606 let matches_index = match sub_head_oid {
607 Some(oid) => oid == ie.oid,
608 None => submodule_worktree_is_unpopulated_placeholder(&sub_dir),
609 };
610 if !matches_index {
611 let path_owned = path_str_ref.to_owned();
612 let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
613 result.push(DiffEntry {
614 status: DiffStatus::Modified,
615 old_path: Some(path_owned.clone()),
616 new_path: Some(path_owned),
617 old_mode: format_mode(ie.mode),
618 new_mode: format_mode(ie.mode),
619 old_oid: ie.oid,
620 new_oid,
621 score: None,
622 });
623 }
624 continue;
625 }
626
627 let file_path = work_tree.join(path_str_ref);
628
629 if is_intent_to_add {
630 match fs::symlink_metadata(&file_path) {
631 Ok(meta) => {
632 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
633 let worktree_oid = hash_worktree_file(
634 odb,
635 &file_path,
636 &meta,
637 &conv,
638 &file_attrs,
639 path_str_ref,
640 None,
641 )?;
642 let worktree_mode = mode_from_metadata(&meta);
643 result.push(DiffEntry {
644 status: DiffStatus::Added,
645 old_path: None,
646 new_path: Some(path_str_ref.to_owned()),
647 old_mode: "000000".to_owned(),
648 new_mode: format_mode(worktree_mode),
649 old_oid: zero_oid(),
650 new_oid: worktree_oid,
651 score: None,
652 });
653 }
654 Err(e)
655 if e.kind() == std::io::ErrorKind::NotFound
656 || e.raw_os_error() == Some(20) =>
657 {
658 result.push(DiffEntry {
659 status: DiffStatus::Deleted,
660 old_path: Some(path_str_ref.to_owned()),
661 new_path: None,
662 old_mode: format_mode(ie.mode),
663 new_mode: "000000".to_owned(),
664 old_oid: empty_blob_oid(),
665 new_oid: zero_oid(),
666 score: None,
667 });
668 }
669 Err(e) => return Err(Error::Io(e)),
670 }
671 continue;
672 }
673
674 if has_symlink_in_path(work_tree, path_str_ref) {
677 result.push(DiffEntry {
678 status: DiffStatus::Deleted,
679 old_path: Some(path_str_ref.to_owned()),
680 new_path: None,
681 old_mode: format_mode(ie.mode),
682 new_mode: "000000".to_owned(),
683 old_oid: ie.oid,
684 new_oid: zero_oid(),
685 score: None,
686 });
687 continue;
688 }
689
690 match fs::symlink_metadata(&file_path) {
691 Ok(meta) if meta.is_dir() => {
692 result.push(DiffEntry {
695 status: DiffStatus::Deleted,
696 old_path: Some(path_str_ref.to_owned()),
697 new_path: None,
698 old_mode: format_mode(ie.mode),
699 new_mode: String::new(),
700 old_oid: ie.oid,
701 new_oid: zero_oid(),
702 score: None,
703 });
704 }
705 Ok(meta) => {
706 let worktree_mode = mode_from_metadata(&meta);
707 if stat_matches(ie, &meta) && worktree_mode != ie.mode {
709 let path_owned = path_str_ref.to_owned();
710 result.push(DiffEntry {
711 status: DiffStatus::Modified,
712 old_path: Some(path_owned.clone()),
713 new_path: Some(path_owned),
714 old_mode: format_mode(ie.mode),
715 new_mode: format_mode(worktree_mode),
716 old_oid: ie.oid,
717 new_oid: ie.oid,
718 score: None,
719 });
720 continue;
721 }
722
723 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
727 let worktree_oid = hash_worktree_file(
728 odb,
729 &file_path,
730 &meta,
731 &conv,
732 &file_attrs,
733 path_str_ref,
734 Some(ie),
735 )?;
736
737 let mut eff_oid = worktree_oid;
741 if eff_oid != ie.oid {
742 if let Ok(raw) = fs::read(&file_path) {
743 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
744 if raw_oid == ie.oid {
745 eff_oid = ie.oid;
746 }
747 }
748 }
749
750 if eff_oid != ie.oid || worktree_mode != ie.mode {
751 let path_owned = path_str_ref.to_owned();
752 result.push(DiffEntry {
753 status: DiffStatus::Modified,
754 old_path: Some(path_owned.clone()),
755 new_path: Some(path_owned),
756 old_mode: format_mode(ie.mode),
757 new_mode: format_mode(worktree_mode),
758 old_oid: ie.oid,
759 new_oid: eff_oid,
760 score: None,
761 });
762 }
763 }
764 Err(e) if e.kind() == std::io::ErrorKind::NotFound
765 || e.raw_os_error() == Some(20) => {
766 result.push(DiffEntry {
768 status: DiffStatus::Deleted,
769 old_path: Some(path_str_ref.to_owned()),
770 new_path: None,
771 old_mode: format_mode(ie.mode),
772 new_mode: "000000".to_owned(),
773 old_oid: ie.oid,
774 new_oid: zero_oid(),
775 score: None,
776 });
777 }
778 Err(e) => return Err(Error::Io(e)),
779 }
780 }
781
782 for (path, (_, base_entry)) in unmerged_base {
783 let file_path = work_tree.join(&path);
784 let wt_meta = match fs::symlink_metadata(&file_path) {
785 Ok(meta) => Some(meta),
786 Err(e)
787 if e.kind() == std::io::ErrorKind::NotFound
788 || e.raw_os_error() == Some(20) =>
789 {
790 None
791 }
792 Err(e) => return Err(Error::Io(e)),
793 };
794
795 let new_mode = wt_meta.as_ref().map_or_else(
796 || "000000".to_owned(),
797 |meta| format_mode(mode_from_metadata(meta)),
798 );
799 result.push(DiffEntry {
800 status: DiffStatus::Unmerged,
801 old_path: Some(path.clone()),
802 new_path: Some(path.clone()),
803 old_mode: "000000".to_owned(),
804 new_mode,
805 old_oid: zero_oid(),
806 new_oid: zero_oid(),
807 score: None,
808 });
809
810 if let Some(meta) = wt_meta {
811 let file_attrs = crlf::get_file_attrs(&attrs, &path, false, &config);
812 let wt_oid = hash_worktree_file(
813 odb,
814 &file_path,
815 &meta,
816 &conv,
817 &file_attrs,
818 &path,
819 Some(base_entry),
820 )?;
821 let wt_mode = mode_from_metadata(&meta);
822 if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
823 result.push(DiffEntry {
824 status: DiffStatus::Modified,
825 old_path: Some(path.clone()),
826 new_path: Some(path),
827 old_mode: format_mode(base_entry.mode),
828 new_mode: format_mode(wt_mode),
829 old_oid: base_entry.oid,
830 new_oid: wt_oid,
831 score: None,
832 });
833 }
834 }
835 }
836
837 Ok(result)
838}
839
840pub fn worktree_differs_from_index_entry(
846 odb: &Odb,
847 work_tree: &Path,
848 ie: &IndexEntry,
849) -> Result<bool> {
850 use crate::config::ConfigSet;
851 use crate::crlf;
852
853 let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
854 let file_path = work_tree.join(path_str_ref);
855
856 if ie.mode == 0o160000 {
857 let sub_head_oid = read_submodule_head(&file_path);
858 return Ok(sub_head_oid.as_ref() != Some(&ie.oid));
859 }
860
861 let meta = match fs::symlink_metadata(&file_path) {
862 Ok(m) => m,
863 Err(e)
864 if e.kind() == std::io::ErrorKind::NotFound
865 || e.raw_os_error() == Some(20) =>
866 {
867 return Ok(true);
868 }
869 Err(e) => return Err(Error::Io(e)),
870 };
871
872 if meta.is_dir() {
873 return Ok(true);
874 }
875
876 let worktree_mode = mode_from_metadata(&meta);
877 if worktree_mode != ie.mode {
878 return Ok(true);
879 }
880
881 let git_dir = work_tree.join(".git");
882 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
883 let conv = crlf::ConversionConfig::from_config(&config);
884 let attrs = crlf::load_gitattributes(work_tree);
885 let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, false, &config);
886 let worktree_oid = hash_worktree_file(
887 odb,
888 &file_path,
889 &meta,
890 &conv,
891 &file_attrs,
892 path_str_ref,
893 Some(ie),
894 )?;
895
896 let mut eff_oid = worktree_oid;
897 if eff_oid != ie.oid {
898 if let Ok(raw) = fs::read(&file_path) {
899 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
900 if raw_oid == ie.oid {
901 eff_oid = ie.oid;
902 }
903 }
904 }
905
906 Ok(eff_oid != ie.oid)
907}
908
909pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
910 if meta.len() as u32 != ie.size {
912 return false;
913 }
914 if meta.mtime() as u32 != ie.mtime_sec {
916 return false;
917 }
918 if meta.mtime_nsec() as u32 != ie.mtime_nsec {
919 return false;
920 }
921 if meta.ctime() as u32 != ie.ctime_sec {
923 return false;
924 }
925 if meta.ctime_nsec() as u32 != ie.ctime_nsec {
926 return false;
927 }
928 if meta.ino() as u32 != ie.ino {
930 return false;
931 }
932 if meta.dev() as u32 != ie.dev {
933 return false;
934 }
935 true
936}
937
938fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
941 let mut check = work_tree.to_path_buf();
942 let components: Vec<&str> = rel_path.split('/').collect();
943 for component in &components[..components.len().saturating_sub(1)] {
945 check.push(component);
946 match fs::symlink_metadata(&check) {
947 Ok(meta) if meta.file_type().is_symlink() => return true,
948 _ => {}
949 }
950 }
951 false
952}
953
954pub fn hash_worktree_file(
955 odb: &Odb,
956 path: &Path,
957 meta: &fs::Metadata,
958 conv: &crate::crlf::ConversionConfig,
959 file_attrs: &crate::crlf::FileAttrs,
960 rel_path: &str,
961 index_entry: Option<&IndexEntry>,
962) -> Result<ObjectId> {
963 let prior_blob: Option<Vec<u8>> = index_entry
964 .filter(|e| e.oid != zero_oid())
965 .and_then(|e| odb.read(&e.oid).ok().map(|o| o.data));
966 let data = if meta.file_type().is_symlink() {
967 let target = fs::read_link(path)?;
969 target.to_string_lossy().into_owned().into_bytes()
970 } else {
971 let raw = fs::read(path)?;
972 let opts = crate::crlf::ConvertToGitOpts {
975 index_blob: prior_blob.as_deref(),
976 renormalize: false,
977 check_safecrlf: false,
978 };
979 crate::crlf::convert_to_git_with_opts(&raw, rel_path, conv, file_attrs, opts).unwrap_or(raw)
980 };
981
982 Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
983}
984
985pub fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
987 if meta.file_type().is_symlink() {
988 0o120000
989 } else if meta.mode() & 0o111 != 0 {
990 0o100755
991 } else {
992 0o100644
993 }
994}
995
996pub fn diff_tree_to_worktree(
1013 odb: &Odb,
1014 tree_oid: Option<&ObjectId>,
1015 work_tree: &Path,
1016 index: &Index,
1017) -> Result<Vec<DiffEntry>> {
1018 use crate::config::ConfigSet;
1019 use crate::crlf;
1020
1021 let git_dir = work_tree.join(".git");
1022 let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
1023 let conv = crlf::ConversionConfig::from_config(&config);
1024 let attrs = crlf::load_gitattributes(work_tree);
1025
1026 let tree_flat = match tree_oid {
1028 Some(oid) => flatten_tree(odb, oid, "")?,
1029 None => Vec::new(),
1030 };
1031 let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
1032 tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
1033
1034 let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
1036 std::collections::BTreeMap::new();
1037 let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1038 for ie in &index.entries {
1039 if ie.stage() != 0 {
1040 continue;
1041 }
1042 let path = String::from_utf8_lossy(&ie.path).to_string();
1043 index_entries.insert(&ie.path, ie);
1044 index_paths.insert(path);
1045 }
1046
1047 let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1049 all_paths.extend(tree_map.keys().cloned());
1050 all_paths.extend(index_paths.iter().cloned());
1051
1052 let mut result = Vec::new();
1053
1054 for path in &all_paths {
1055 let tree_entry = tree_map.get(path.as_str());
1056
1057 let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
1059 || index_entries
1060 .get(path.as_bytes())
1061 .is_some_and(|ie| ie.mode == 0o160000);
1062 if is_gitlink {
1063 if let Some(te) = tree_entry {
1064 let sub_dir = work_tree.join(path);
1065 let sub_head = read_submodule_head_oid(&sub_dir);
1066 if sub_head.as_ref() != Some(&te.oid) {
1067 let new_oid = sub_head.unwrap_or_else(zero_oid);
1068 result.push(DiffEntry {
1069 status: DiffStatus::Modified,
1070 old_path: Some(path.clone()),
1071 new_path: Some(path.clone()),
1072 old_mode: format_mode(te.mode),
1073 new_mode: format_mode(te.mode),
1074 old_oid: te.oid,
1075 new_oid,
1076 score: None,
1077 });
1078 }
1079 }
1080 continue;
1081 }
1082
1083 let file_path = work_tree.join(path);
1084
1085 let wt_meta = match fs::symlink_metadata(&file_path) {
1086 Ok(m) => Some(m),
1087 Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
1088 Err(e) => return Err(Error::Io(e)),
1089 };
1090
1091 match (tree_entry, wt_meta) {
1092 (Some(te), Some(ref meta)) => {
1093 let wt_mode = mode_from_metadata(meta);
1094 let Some(ie) = index_entries.get(path.as_bytes()) else {
1095 continue;
1096 };
1097
1098 let index_matches_tree = ie.oid == te.oid && ie.mode == te.mode;
1099
1100 if index_matches_tree && wt_mode == te.mode && stat_matches(ie, meta) {
1102 continue;
1103 }
1104
1105 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1106 let idx_ent = index_entries.get(path.as_bytes()).copied();
1107
1108 if ie.oid == te.oid && ie.mode != te.mode {
1110 result.push(DiffEntry {
1111 status: DiffStatus::Modified,
1112 old_path: Some(path.clone()),
1113 new_path: Some(path.clone()),
1114 old_mode: format_mode(te.mode),
1115 new_mode: format_mode(ie.mode),
1116 old_oid: te.oid,
1117 new_oid: te.oid,
1118 score: None,
1119 });
1120 continue;
1121 }
1122
1123 if index_matches_tree {
1126 let wt_oid = hash_worktree_file(
1127 odb,
1128 &file_path,
1129 meta,
1130 &conv,
1131 &file_attrs,
1132 path,
1133 idx_ent,
1134 )?;
1135 let mut eff_oid = wt_oid;
1136 if eff_oid != te.oid {
1137 if let Ok(raw) = fs::read(&file_path) {
1138 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1139 if raw_oid == te.oid {
1140 eff_oid = te.oid;
1141 }
1142 }
1143 }
1144 if eff_oid != te.oid {
1145 result.push(DiffEntry {
1146 status: DiffStatus::Modified,
1147 old_path: Some(path.clone()),
1148 new_path: Some(path.clone()),
1149 old_mode: format_mode(te.mode),
1150 new_mode: format_mode(wt_mode),
1151 old_oid: te.oid,
1152 new_oid: eff_oid,
1153 score: None,
1154 });
1155 } else if wt_mode != te.mode {
1156 result.push(DiffEntry {
1157 status: DiffStatus::Modified,
1158 old_path: Some(path.clone()),
1159 new_path: Some(path.clone()),
1160 old_mode: format_mode(te.mode),
1161 new_mode: format_mode(wt_mode),
1162 old_oid: te.oid,
1163 new_oid: te.oid,
1164 score: None,
1165 });
1166 }
1167 continue;
1168 }
1169
1170 let wt_oid =
1172 hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path, idx_ent)?;
1173 let mut eff_oid = wt_oid;
1174 if eff_oid != te.oid {
1175 if let Ok(raw) = fs::read(&file_path) {
1176 let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
1177 if raw_oid == te.oid {
1178 eff_oid = te.oid;
1179 }
1180 }
1181 }
1182 if eff_oid != te.oid || wt_mode != te.mode {
1183 result.push(DiffEntry {
1184 status: DiffStatus::Modified,
1185 old_path: Some(path.clone()),
1186 new_path: Some(path.clone()),
1187 old_mode: format_mode(te.mode),
1188 new_mode: format_mode(wt_mode),
1189 old_oid: te.oid,
1190 new_oid: eff_oid,
1191 score: None,
1192 });
1193 }
1194 }
1195 (Some(te), None) => {
1196 result.push(DiffEntry {
1198 status: DiffStatus::Deleted,
1199 old_path: Some(path.clone()),
1200 new_path: None,
1201 old_mode: format_mode(te.mode),
1202 new_mode: "000000".to_owned(),
1203 old_oid: te.oid,
1204 new_oid: zero_oid(),
1205 score: None,
1206 });
1207 }
1208 (None, Some(ref meta)) => {
1209 let file_attrs = crlf::get_file_attrs(&attrs, path, false, &config);
1211 let wt_oid = hash_worktree_file(
1212 odb,
1213 &file_path,
1214 meta,
1215 &conv,
1216 &file_attrs,
1217 path,
1218 index_entries.get(path.as_bytes()).copied(),
1219 )?;
1220 let wt_mode = mode_from_metadata(meta);
1221 result.push(DiffEntry {
1222 status: DiffStatus::Added,
1223 old_path: None,
1224 new_path: Some(path.clone()),
1225 old_mode: "000000".to_owned(),
1226 new_mode: format_mode(wt_mode),
1227 old_oid: zero_oid(),
1228 new_oid: wt_oid,
1229 score: None,
1230 });
1231 }
1232 (None, None) => {
1233 }
1235 }
1236 }
1237
1238 result.sort_by(|a, b| a.path().cmp(b.path()));
1239 Ok(result)
1240}
1241
1242pub fn detect_renames(odb: &Odb, entries: Vec<DiffEntry>, threshold: u32) -> Vec<DiffEntry> {
1252 let mut deleted: Vec<DiffEntry> = Vec::new();
1254 let mut added: Vec<DiffEntry> = Vec::new();
1255 let mut others: Vec<DiffEntry> = Vec::new();
1256
1257 for entry in entries {
1258 match entry.status {
1259 DiffStatus::Deleted => deleted.push(entry),
1260 DiffStatus::Added => added.push(entry),
1261 _ => others.push(entry),
1262 }
1263 }
1264
1265 if deleted.is_empty() || added.is_empty() {
1266 let mut result = others;
1268 result.extend(deleted);
1269 result.extend(added);
1270 result.sort_by(|a, b| a.path().cmp(b.path()));
1271 return result;
1272 }
1273
1274 let deleted_contents: Vec<Option<Vec<u8>>> = deleted
1276 .iter()
1277 .map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
1278 .collect();
1279
1280 let added_contents: Vec<Option<Vec<u8>>> = added
1282 .iter()
1283 .map(|a| odb.read(&a.new_oid).ok().map(|obj| obj.data))
1284 .collect();
1285
1286 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1289
1290 for (di, del) in deleted.iter().enumerate() {
1291 for (ai, add) in added.iter().enumerate() {
1292 if del.old_oid == add.new_oid {
1294 scores.push((100, di, ai));
1295 continue;
1296 }
1297
1298 let score = match (&deleted_contents[di], &added_contents[ai]) {
1299 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
1300 _ => 0,
1301 };
1302
1303 if score >= threshold {
1304 scores.push((score, di, ai));
1305 }
1306 }
1307 }
1308
1309 scores.sort_by(|a, b| {
1312 let a_same = same_basename(&deleted[a.1], &added[a.2]);
1313 let b_same = same_basename(&deleted[b.1], &added[b.2]);
1314 b_same.cmp(&a_same).then_with(|| b.0.cmp(&a.0))
1315 });
1316
1317 let mut used_deleted = vec![false; deleted.len()];
1318 let mut used_added = vec![false; added.len()];
1319 let mut renames: Vec<DiffEntry> = Vec::new();
1320
1321 for (score, di, ai) in &scores {
1322 if used_deleted[*di] || used_added[*ai] {
1323 continue;
1324 }
1325 used_deleted[*di] = true;
1326 used_added[*ai] = true;
1327
1328 let del = &deleted[*di];
1329 let add = &added[*ai];
1330
1331 renames.push(DiffEntry {
1332 status: DiffStatus::Renamed,
1333 old_path: del.old_path.clone(),
1334 new_path: add.new_path.clone(),
1335 old_mode: del.old_mode.clone(),
1336 new_mode: add.new_mode.clone(),
1337 old_oid: del.old_oid,
1338 new_oid: add.new_oid,
1339 score: Some(*score),
1340 });
1341 }
1342
1343 let mut result = others;
1345 result.extend(renames);
1346 for (i, entry) in deleted.into_iter().enumerate() {
1347 if !used_deleted[i] {
1348 result.push(entry);
1349 }
1350 }
1351 for (i, entry) in added.into_iter().enumerate() {
1352 if !used_added[i] {
1353 result.push(entry);
1354 }
1355 }
1356
1357 result.sort_by(|a, b| a.path().cmp(b.path()));
1358 result
1359}
1360
1361pub fn detect_copies(
1372 odb: &Odb,
1373 entries: Vec<DiffEntry>,
1374 threshold: u32,
1375 find_copies_harder: bool,
1376 source_tree_entries: &[(String, String, ObjectId)],
1377) -> Vec<DiffEntry> {
1378 use std::collections::{HashMap, HashSet};
1379
1380 let mut deleted: Vec<DiffEntry> = Vec::new();
1382 let mut added: Vec<DiffEntry> = Vec::new();
1383 let mut others: Vec<DiffEntry> = Vec::new();
1384
1385 for entry in entries {
1386 match entry.status {
1387 DiffStatus::Deleted => deleted.push(entry),
1388 DiffStatus::Added => added.push(entry),
1389 _ => others.push(entry),
1390 }
1391 }
1392
1393 if added.is_empty() {
1394 let mut result = others;
1395 result.extend(deleted);
1396 result.sort_by(|a, b| a.path().cmp(b.path()));
1397 return result;
1398 }
1399
1400 let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
1404
1405 for entry in &deleted {
1406 if let Some(ref path) = entry.old_path {
1407 deleted_source_idx.insert(path.clone(), sources.len());
1408 sources.push((path.clone(), entry.old_oid, true));
1409 }
1410 }
1411
1412 for entry in &others {
1414 if entry.status == DiffStatus::Modified {
1415 if let Some(ref old_path) = entry.old_path {
1416 if !sources.iter().any(|(p, _, _)| p == old_path) {
1417 sources.push((old_path.clone(), entry.old_oid, false));
1418 }
1419 }
1420 }
1421 }
1422
1423 if find_copies_harder {
1425 for (path, _mode, oid) in source_tree_entries {
1426 if !sources.iter().any(|(p, _, _)| p == path) {
1427 sources.push((path.clone(), *oid, false));
1428 }
1429 }
1430 }
1431
1432 if sources.is_empty() {
1433 let mut result = others;
1434 result.extend(deleted);
1435 result.extend(added);
1436 result.sort_by(|a, b| a.path().cmp(b.path()));
1437 return result;
1438 }
1439
1440 let source_contents: Vec<Option<Vec<u8>>> = sources
1442 .iter()
1443 .map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
1444 .collect();
1445
1446 let added_contents: Vec<Option<Vec<u8>>> = added
1448 .iter()
1449 .map(|a| odb.read(&a.new_oid).ok().map(|obj| obj.data))
1450 .collect();
1451
1452 let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1454 for (si, (src_path, src_oid, _)) in sources.iter().enumerate() {
1455 for (ai, add) in added.iter().enumerate() {
1456 if add.new_path.as_deref() == Some(src_path.as_str()) {
1459 continue;
1460 }
1461 if *src_oid == add.new_oid {
1462 scores.push((100, si, ai));
1463 continue;
1464 }
1465 let score = match (&source_contents[si], &added_contents[ai]) {
1466 (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
1467 _ => 0,
1468 };
1469 if score >= threshold {
1470 scores.push((score, si, ai));
1471 }
1472 }
1473 }
1474
1475 scores.sort_by(|a, b| b.0.cmp(&a.0));
1477
1478 let mut used_added = vec![false; added.len()];
1480 let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
1481 for &(score, si, ai) in &scores {
1482 if used_added[ai] {
1483 continue;
1484 }
1485 used_added[ai] = true;
1486 source_to_added.entry(si).or_default().push((ai, score));
1487 }
1488
1489 let mut used_added2 = vec![false; added.len()];
1493 let mut result_entries: Vec<DiffEntry> = Vec::new();
1494
1495 let mut renamed_deleted: HashSet<usize> = HashSet::new();
1497
1498 for (&si, assignments_for_src) in &source_to_added {
1500 let (_, _, is_deleted) = &sources[si];
1501 if *is_deleted && !assignments_for_src.is_empty() {
1502 let rename_ai = assignments_for_src
1505 .iter()
1506 .max_by_key(|(ai, _score)| added[*ai].path().to_string())
1507 .map(|(ai, _)| *ai);
1508
1509 for &(ai, score) in assignments_for_src {
1510 let (ref src_path, _, _) = sources[si];
1511 let add = &added[ai];
1512 let src_mode = source_tree_entries
1513 .iter()
1514 .find(|(p, _, _)| p == src_path)
1515 .map(|(_, m, _)| m.clone())
1516 .unwrap_or_else(|| add.old_mode.clone());
1517
1518 let is_rename = Some(ai) == rename_ai;
1519 result_entries.push(DiffEntry {
1520 status: if is_rename {
1521 DiffStatus::Renamed
1522 } else {
1523 DiffStatus::Copied
1524 },
1525 old_path: Some(src_path.clone()),
1526 new_path: add.new_path.clone(),
1527 old_mode: src_mode,
1528 new_mode: add.new_mode.clone(),
1529 old_oid: sources[si].1,
1530 new_oid: add.new_oid,
1531 score: Some(score),
1532 });
1533 used_added2[ai] = true;
1534 }
1535 renamed_deleted.insert(si);
1536 } else {
1537 for &(ai, score) in assignments_for_src {
1539 let (ref src_path, _, _) = sources[si];
1540 let add = &added[ai];
1541 let src_mode = source_tree_entries
1542 .iter()
1543 .find(|(p, _, _)| p == src_path)
1544 .map(|(_, m, _)| m.clone())
1545 .unwrap_or_else(|| add.old_mode.clone());
1546
1547 result_entries.push(DiffEntry {
1548 status: DiffStatus::Copied,
1549 old_path: Some(src_path.clone()),
1550 new_path: add.new_path.clone(),
1551 old_mode: src_mode,
1552 new_mode: add.new_mode.clone(),
1553 old_oid: sources[si].1,
1554 new_oid: add.new_oid,
1555 score: Some(score),
1556 });
1557 used_added2[ai] = true;
1558 }
1559 }
1560 }
1561
1562 for entry in deleted.into_iter() {
1564 if let Some(ref path) = entry.old_path {
1565 if let Some(&si) = deleted_source_idx.get(path) {
1566 if renamed_deleted.contains(&si) {
1567 continue;
1569 }
1570 }
1571 }
1572 result_entries.push(entry);
1573 }
1574
1575 let mut result = others;
1576 result.extend(result_entries);
1577 for (i, entry) in added.into_iter().enumerate() {
1579 if !used_added2[i] {
1580 result.push(entry);
1581 }
1582 }
1583
1584 result.sort_by(|a, b| a.path().cmp(b.path()));
1585 result
1586}
1587
1588pub fn status_apply_rename_copy_detection(
1598 odb: &Odb,
1599 unstaged_raw: Vec<DiffEntry>,
1600 threshold: u32,
1601 copies: bool,
1602 head_tree: Option<&ObjectId>,
1603) -> Result<Vec<DiffEntry>> {
1604 let after_renames = detect_renames(odb, unstaged_raw, threshold);
1605 if !copies {
1606 return Ok(after_renames);
1607 }
1608 let source_tree_entries: Vec<(String, String, ObjectId)> = match head_tree {
1609 Some(oid) => flatten_tree(odb, oid, "")?
1610 .into_iter()
1611 .map(|e| (e.path, format_mode(e.mode), e.oid))
1612 .collect(),
1613 None => Vec::new(),
1614 };
1615 Ok(detect_copies(
1616 odb,
1617 after_renames,
1618 threshold,
1619 false,
1620 &source_tree_entries,
1621 ))
1622}
1623
1624pub fn format_rename_path(old: &str, new: &str) -> String {
1632 let ob = old.as_bytes();
1633 let nb = new.as_bytes();
1634
1635 let pfx = {
1637 let mut last_sep = 0usize;
1638 let min_len = ob.len().min(nb.len());
1639 for i in 0..min_len {
1640 if ob[i] != nb[i] {
1641 break;
1642 }
1643 if ob[i] == b'/' {
1644 last_sep = i + 1;
1645 }
1646 }
1647 last_sep
1648 };
1649
1650 let mut sfx = {
1652 let mut last_sep = 0usize;
1653 let min_len = ob.len().min(nb.len());
1654 for i in 0..min_len {
1655 let oi = ob.len() - 1 - i;
1656 let ni = nb.len() - 1 - i;
1657 if ob[oi] != nb[ni] {
1658 break;
1659 }
1660 if ob[oi] == b'/' {
1661 last_sep = i + 1;
1662 }
1663 }
1664 last_sep
1665 };
1666
1667 let mut sfx_at_old = ob.len() - sfx;
1669 let mut sfx_at_new = nb.len() - sfx;
1670
1671 while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
1674 let suffix_bytes = &ob[sfx_at_old..];
1676 let mut new_sfx = 0;
1677 for i in 1..suffix_bytes.len() {
1679 if suffix_bytes[i] == b'/' {
1680 new_sfx = sfx - i;
1681 break;
1682 }
1683 }
1684 if new_sfx == 0 || new_sfx >= sfx {
1685 sfx_at_old = ob.len();
1686 sfx_at_new = nb.len();
1687 break;
1688 }
1689 sfx = new_sfx;
1690 sfx_at_old = ob.len() - sfx;
1691 sfx_at_new = nb.len() - sfx;
1692 }
1693
1694 let prefix = &old[..pfx];
1701 let suffix = &old[sfx_at_old..];
1702 let old_mid = if pfx <= sfx_at_old {
1703 &old[pfx..sfx_at_old]
1704 } else {
1705 ""
1706 };
1707 let new_mid = if pfx <= sfx_at_new {
1708 &new[pfx..sfx_at_new]
1709 } else {
1710 ""
1711 };
1712
1713 if prefix.is_empty() && suffix.is_empty() {
1714 return format!("{old} => {new}");
1715 }
1716
1717 format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
1718}
1719
1720fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
1722 let old = del.old_path.as_deref().unwrap_or("");
1723 let new = add.new_path.as_deref().unwrap_or("");
1724 let old_base = old.rsplit('/').next().unwrap_or(old);
1725 let new_base = new.rsplit('/').next().unwrap_or(new);
1726 old_base == new_base && !old_base.is_empty()
1727}
1728
1729fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
1734 let old_norm = crate::crlf::crlf_to_lf(old);
1737 let new_norm = crate::crlf::crlf_to_lf(new);
1738
1739 let src_size = old_norm.len();
1740 let dst_size = new_norm.len();
1741
1742 if src_size == 0 && dst_size == 0 {
1743 return 100;
1744 }
1745 let total = src_size + dst_size;
1746 if total == 0 {
1747 return 100;
1748 }
1749
1750 use similar::{ChangeTag, TextDiff};
1752 let old_str = String::from_utf8_lossy(&old_norm);
1753 let new_str = String::from_utf8_lossy(&new_norm);
1754 let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
1755
1756 let mut shared_bytes = 0usize;
1757 for change in diff.iter_all_changes() {
1758 if change.tag() == ChangeTag::Equal {
1759 shared_bytes += change.value().len();
1761 }
1762 }
1763
1764 let max_size = src_size.max(dst_size);
1767
1768 ((shared_bytes * 100) / max_size).min(100) as u32
1769}
1770
1771#[must_use]
1775pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
1776 compute_similarity(old, new)
1777}
1778
1779pub fn format_raw(entry: &DiffEntry) -> String {
1785 let path = match entry.status {
1786 DiffStatus::Renamed | DiffStatus::Copied => {
1787 format!(
1788 "{}\t{}",
1789 entry.old_path.as_deref().unwrap_or(""),
1790 entry.new_path.as_deref().unwrap_or("")
1791 )
1792 }
1793 _ => entry.path().to_owned(),
1794 };
1795
1796 let status_str = match (entry.status, entry.score) {
1797 (DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
1798 (DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
1799 _ => entry.status.letter().to_string(),
1800 };
1801
1802 format!(
1803 ":{} {} {} {} {}\t{}",
1804 entry.old_mode, entry.new_mode, entry.old_oid, entry.new_oid, status_str, path
1805 )
1806}
1807
1808pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
1810 let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
1811 "..."
1812 } else {
1813 ""
1814 };
1815 let old_hex = format!("{}", entry.old_oid);
1816 let new_hex = format!("{}", entry.new_oid);
1817 let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
1818 let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
1819
1820 let path = entry.path();
1821
1822 format!(
1823 ":{} {} {}{} {}{} {}\t{}",
1824 entry.old_mode,
1825 entry.new_mode,
1826 old_abbrev,
1827 ellipsis,
1828 new_abbrev,
1829 ellipsis,
1830 entry.status.letter(),
1831 path
1832 )
1833}
1834
1835pub fn unified_diff(
1850 old_content: &str,
1851 new_content: &str,
1852 old_path: &str,
1853 new_path: &str,
1854 context_lines: usize,
1855) -> String {
1856 unified_diff_with_prefix(
1857 old_content,
1858 new_content,
1859 old_path,
1860 new_path,
1861 context_lines,
1862 0,
1863 "a/",
1864 "b/",
1865 )
1866}
1867
1868pub fn unified_diff_with_prefix(
1873 old_content: &str,
1874 new_content: &str,
1875 old_path: &str,
1876 new_path: &str,
1877 context_lines: usize,
1878 inter_hunk_context: usize,
1879 src_prefix: &str,
1880 dst_prefix: &str,
1881) -> String {
1882 unified_diff_with_prefix_and_funcname(
1883 old_content,
1884 new_content,
1885 old_path,
1886 new_path,
1887 context_lines,
1888 inter_hunk_context,
1889 src_prefix,
1890 dst_prefix,
1891 None,
1892 )
1893}
1894
1895pub fn unified_diff_with_prefix_and_funcname(
1898 old_content: &str,
1899 new_content: &str,
1900 old_path: &str,
1901 new_path: &str,
1902 context_lines: usize,
1903 inter_hunk_context: usize,
1904 src_prefix: &str,
1905 dst_prefix: &str,
1906 funcname_matcher: Option<&FuncnameMatcher>,
1907) -> String {
1908 unified_diff_with_prefix_and_funcname_and_algorithm(
1909 old_content,
1910 new_content,
1911 old_path,
1912 new_path,
1913 context_lines,
1914 inter_hunk_context,
1915 src_prefix,
1916 dst_prefix,
1917 funcname_matcher,
1918 similar::Algorithm::Myers,
1919 )
1920}
1921
1922pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
1925 old_content: &str,
1926 new_content: &str,
1927 old_path: &str,
1928 new_path: &str,
1929 context_lines: usize,
1930 inter_hunk_context: usize,
1931 src_prefix: &str,
1932 dst_prefix: &str,
1933 funcname_matcher: Option<&FuncnameMatcher>,
1934 algorithm: similar::Algorithm,
1935) -> String {
1936 use similar::{group_diff_ops, udiff::UnifiedDiffHunk, TextDiff};
1937
1938 let diff = TextDiff::configure()
1939 .algorithm(algorithm)
1940 .diff_lines(old_content, new_content);
1941
1942 let mut output = String::new();
1943 if old_path == "/dev/null" {
1944 output.push_str("--- /dev/null\n");
1945 } else {
1946 output.push_str(&format!("--- {src_prefix}{old_path}\n"));
1947 }
1948 if new_path == "/dev/null" {
1949 output.push_str("+++ /dev/null\n");
1950 } else {
1951 output.push_str(&format!("+++ {dst_prefix}{new_path}\n"));
1952 }
1953
1954 let old_lines: Vec<&str> = old_content.lines().collect();
1955
1956 let group_radius = context_lines
1957 .saturating_mul(2)
1958 .saturating_add(inter_hunk_context);
1959 let op_groups = group_diff_ops(diff.ops().to_vec(), group_radius);
1960
1961 for ops in op_groups {
1962 if ops.is_empty() {
1963 continue;
1964 }
1965 let hunk = UnifiedDiffHunk::new(ops, &diff, true);
1966 let hunk_str = format!("{hunk}");
1967 if let Some(first_newline) = hunk_str.find('\n') {
1971 let header_line = &hunk_str[..first_newline];
1972 let rest = &hunk_str[first_newline..];
1973
1974 if let Some(func_ctx) =
1976 extract_function_context(header_line, &old_lines, funcname_matcher)
1977 {
1978 output.push_str(header_line);
1979 output.push(' ');
1980 output.push_str(&func_ctx);
1981 output.push_str(rest);
1982 } else {
1983 output.push_str(&hunk_str);
1984 }
1985 } else {
1986 output.push_str(&hunk_str);
1987 }
1988 }
1989
1990 output
1991}
1992
1993pub fn anchored_unified_diff(
2002 old_content: &str,
2003 new_content: &str,
2004 old_path: &str,
2005 new_path: &str,
2006 context_lines: usize,
2007 anchors: &[String],
2008 algorithm: similar::Algorithm,
2009) -> String {
2010 use similar::TextDiff;
2011
2012 let old_lines: Vec<&str> = old_content.lines().collect();
2013 let new_lines: Vec<&str> = new_content.lines().collect();
2014
2015 let mut anchor_pairs: Vec<(usize, usize)> = Vec::new(); for anchor in anchors {
2019 let anchor_str = anchor.as_str();
2020
2021 let old_positions: Vec<usize> = old_lines
2023 .iter()
2024 .enumerate()
2025 .filter(|(_, l)| l.trim_end() == anchor_str)
2026 .map(|(i, _)| i)
2027 .collect();
2028
2029 let new_positions: Vec<usize> = new_lines
2031 .iter()
2032 .enumerate()
2033 .filter(|(_, l)| l.trim_end() == anchor_str)
2034 .map(|(i, _)| i)
2035 .collect();
2036
2037 if old_positions.len() == 1 && new_positions.len() == 1 {
2039 anchor_pairs.push((old_positions[0], new_positions[0]));
2040 }
2041 }
2042
2043 if anchor_pairs.is_empty() {
2045 return unified_diff_with_prefix_and_funcname_and_algorithm(
2046 old_content,
2047 new_content,
2048 old_path,
2049 new_path,
2050 context_lines,
2051 0,
2052 "a/",
2053 "b/",
2054 None,
2055 algorithm,
2056 );
2057 }
2058
2059 anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
2061
2062 let mut filtered: Vec<(usize, usize)> = Vec::new();
2065 for &pair in &anchor_pairs {
2066 if filtered.is_empty() || pair.1 > filtered.last().unwrap().1 {
2067 filtered.push(pair);
2068 }
2069 }
2070 let anchor_pairs = filtered;
2071
2072 struct DiffOp {
2081 tag: char, line: String,
2083 }
2084
2085 let mut ops: Vec<DiffOp> = Vec::new();
2086 let mut old_pos = 0usize;
2087 let mut new_pos = 0usize;
2088
2089 for &(old_anchor, new_anchor) in &anchor_pairs {
2090 let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
2092 let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
2093
2094 let old_seg_text = old_segment.join("\n");
2095 let new_seg_text = new_segment.join("\n");
2096
2097 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
2098 let old_seg_input = if old_seg_text.is_empty() {
2099 String::new()
2100 } else {
2101 format!("{}\n", old_seg_text)
2102 };
2103 let new_seg_input = if new_seg_text.is_empty() {
2104 String::new()
2105 } else {
2106 format!("{}\n", new_seg_text)
2107 };
2108 let seg_diff = TextDiff::configure()
2109 .algorithm(algorithm)
2110 .diff_lines(&old_seg_input, &new_seg_input);
2111 for change in seg_diff.iter_all_changes() {
2112 let tag = match change.tag() {
2113 similar::ChangeTag::Equal => ' ',
2114 similar::ChangeTag::Delete => '-',
2115 similar::ChangeTag::Insert => '+',
2116 };
2117 ops.push(DiffOp {
2118 tag,
2119 line: change.value().trim_end_matches('\n').to_string(),
2120 });
2121 }
2122 }
2123
2124 ops.push(DiffOp {
2126 tag: ' ',
2127 line: old_lines[old_anchor].to_string(),
2128 });
2129
2130 old_pos = old_anchor + 1;
2131 new_pos = new_anchor + 1;
2132 }
2133
2134 let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
2136 let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
2137 let old_seg_text = old_segment.join("\n");
2138 let new_seg_text = new_segment.join("\n");
2139
2140 if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
2141 let old_seg_input = if old_seg_text.is_empty() {
2142 String::new()
2143 } else {
2144 format!("{}\n", old_seg_text)
2145 };
2146 let new_seg_input = if new_seg_text.is_empty() {
2147 String::new()
2148 } else {
2149 format!("{}\n", new_seg_text)
2150 };
2151 let seg_diff = TextDiff::configure()
2152 .algorithm(algorithm)
2153 .diff_lines(&old_seg_input, &new_seg_input);
2154 for change in seg_diff.iter_all_changes() {
2155 let tag = match change.tag() {
2156 similar::ChangeTag::Equal => ' ',
2157 similar::ChangeTag::Delete => '-',
2158 similar::ChangeTag::Insert => '+',
2159 };
2160 ops.push(DiffOp {
2161 tag,
2162 line: change.value().trim_end_matches('\n').to_string(),
2163 });
2164 }
2165 }
2166
2167 let mut output = String::new();
2169 if old_path == "/dev/null" {
2170 output.push_str("--- /dev/null\n");
2171 } else {
2172 output.push_str(&format!("--- a/{old_path}\n"));
2173 }
2174 if new_path == "/dev/null" {
2175 output.push_str("+++ /dev/null\n");
2176 } else {
2177 output.push_str(&format!("+++ b/{new_path}\n"));
2178 }
2179
2180 let total_ops = ops.len();
2182 if total_ops == 0 {
2183 return output;
2184 }
2185
2186 let mut hunks: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
2189 while i < total_ops {
2190 if ops[i].tag != ' ' {
2191 let start = i.saturating_sub(context_lines);
2192 let mut end = i;
2193 while end < total_ops {
2195 if ops[end].tag != ' ' {
2196 end += 1;
2197 continue;
2198 }
2199 let mut next_change = end;
2201 while next_change < total_ops && ops[next_change].tag == ' ' {
2202 next_change += 1;
2203 }
2204 if next_change < total_ops && next_change - end <= context_lines * 2 {
2205 end = next_change + 1;
2206 } else {
2207 end = (end + context_lines).min(total_ops);
2208 break;
2209 }
2210 }
2211 if let Some(last) = hunks.last_mut() {
2213 if start <= last.1 {
2214 last.1 = end;
2215 } else {
2216 hunks.push((start, end));
2217 }
2218 } else {
2219 hunks.push((start, end));
2220 }
2221 i = end;
2222 } else {
2223 i += 1;
2224 }
2225 }
2226
2227 for (start, end) in hunks {
2229 let mut old_start = 1usize;
2231 let mut new_start = 1usize;
2232 for op in &ops[..start] {
2234 match op.tag {
2235 ' ' => {
2236 old_start += 1;
2237 new_start += 1;
2238 }
2239 '-' => {
2240 old_start += 1;
2241 }
2242 '+' => {
2243 new_start += 1;
2244 }
2245 _ => {}
2246 }
2247 }
2248 let mut old_count = 0usize;
2249 let mut new_count = 0usize;
2250 for op in &ops[start..end] {
2251 match op.tag {
2252 ' ' => {
2253 old_count += 1;
2254 new_count += 1;
2255 }
2256 '-' => {
2257 old_count += 1;
2258 }
2259 '+' => {
2260 new_count += 1;
2261 }
2262 _ => {}
2263 }
2264 }
2265
2266 output.push_str(&format!(
2267 "@@ -{},{} +{},{} @@\n",
2268 old_start, old_count, new_start, new_count
2269 ));
2270 for op in &ops[start..end] {
2271 output.push(op.tag);
2272 output.push_str(&op.line);
2273 output.push('\n');
2274 }
2275 }
2276
2277 output
2278}
2279
2280fn extract_function_context(
2286 header: &str,
2287 old_lines: &[&str],
2288 funcname_matcher: Option<&FuncnameMatcher>,
2289) -> Option<String> {
2290 let at_pos = header.find("-")?;
2292 let rest = &header[at_pos + 1..];
2293 let comma_or_space = rest.find([',', ' '])?;
2294 let start_str = &rest[..comma_or_space];
2295 let start_line: usize = start_str.parse().ok()?;
2296
2297 if start_line <= 1 {
2298 return None;
2299 }
2300
2301 let search_end = (start_line - 1).min(old_lines.len());
2306 let truncate = |text: &str| {
2307 if text.len() > 80 {
2308 let mut end = 80;
2309 while end > 0 && !text.is_char_boundary(end) {
2310 end -= 1;
2311 }
2312 text[..end].to_owned()
2313 } else {
2314 text.to_owned()
2315 }
2316 };
2317
2318 for i in (0..search_end).rev() {
2319 let line = old_lines[i];
2320 if line.is_empty() {
2321 continue;
2322 }
2323 if let Some(matcher) = funcname_matcher {
2324 if let Some(matched) = matcher.match_line(line) {
2325 return Some(truncate(&matched));
2326 }
2327 continue;
2328 }
2329
2330 let first = line.as_bytes()[0];
2331 if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
2332 return Some(truncate(line.trim_end_matches(char::is_whitespace)));
2333 }
2334 }
2335 None
2336}
2337
2338pub fn format_stat_line(
2342 path: &str,
2343 insertions: usize,
2344 deletions: usize,
2345 max_path_len: usize,
2346) -> String {
2347 format_stat_line_width(path, insertions, deletions, max_path_len, 0)
2348}
2349
2350pub fn format_stat_line_width(
2351 path: &str,
2352 insertions: usize,
2353 deletions: usize,
2354 max_path_len: usize,
2355 count_width: usize,
2356) -> String {
2357 let total = insertions + deletions;
2358 let plus = "+".repeat(insertions.min(50));
2359 let minus = "-".repeat(deletions.min(50));
2360 let cw = if count_width > 0 {
2361 count_width
2362 } else {
2363 format!("{}", total).len()
2364 };
2365 let bar = format!("{}{}", plus, minus);
2366 if bar.is_empty() {
2367 format!(
2368 " {:<width$} | {:>cw$}",
2369 path,
2370 total,
2371 width = max_path_len,
2372 cw = cw
2373 )
2374 } else {
2375 format!(
2376 " {:<width$} | {:>cw$} {}",
2377 path,
2378 total,
2379 bar,
2380 width = max_path_len,
2381 cw = cw
2382 )
2383 }
2384}
2385
2386pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
2390 count_changes_with_algorithm(old_content, new_content, similar::Algorithm::Myers)
2391}
2392
2393#[must_use]
2398pub fn count_changes_with_algorithm(
2399 old_content: &str,
2400 new_content: &str,
2401 algorithm: similar::Algorithm,
2402) -> (usize, usize) {
2403 use similar::{ChangeTag, TextDiff};
2404
2405 let diff = TextDiff::configure()
2406 .algorithm(algorithm)
2407 .diff_lines(old_content, new_content);
2408 let mut ins = 0;
2409 let mut del = 0;
2410
2411 for change in diff.iter_all_changes() {
2412 match change.tag() {
2413 ChangeTag::Insert => ins += 1,
2414 ChangeTag::Delete => del += 1,
2415 ChangeTag::Equal => {}
2416 }
2417 }
2418
2419 (ins, del)
2420}
2421
2422#[must_use]
2427pub fn count_git_lines(data: &[u8]) -> usize {
2428 if data.is_empty() {
2429 return 0;
2430 }
2431 let mut count = 0usize;
2432 let mut nl_just_seen = false;
2433 for &ch in data {
2434 if ch == b'\n' {
2435 count += 1;
2436 nl_just_seen = true;
2437 } else {
2438 nl_just_seen = false;
2439 }
2440 }
2441 if !nl_just_seen {
2442 count += 1;
2443 }
2444 count
2445}
2446
2447const DIFF_MAX_SCORE: u64 = 60_000;
2448const DIFF_MINIMUM_BREAK_SIZE: usize = 400;
2449const DIFF_DEFAULT_BREAK_SCORE: u64 = 30_000;
2450const DIFF_HASHBASE: u32 = 107_927;
2451
2452#[derive(Clone, Copy, Default)]
2453struct SpanSlot {
2454 hashval: u32,
2455 cnt: u32,
2456}
2457
2458struct SpanHashTop {
2459 alloc_log2: u8,
2460 free_slots: i32,
2461 data: Vec<SpanSlot>,
2462}
2463
2464impl SpanHashTop {
2465 fn new(initial_log2: u8) -> Self {
2466 let cap = 1usize << initial_log2;
2467 Self {
2468 alloc_log2: initial_log2,
2469 free_slots: initial_free(initial_log2),
2470 data: vec![SpanSlot::default(); cap],
2471 }
2472 }
2473
2474 fn len(&self) -> usize {
2475 1usize << self.alloc_log2
2476 }
2477
2478 fn add_span(&mut self, hashval: u32, cnt: u32) {
2479 loop {
2480 let lim = self.len();
2481 let mut bucket = (hashval as usize) & (lim - 1);
2482 loop {
2483 let h = &mut self.data[bucket];
2484 if h.cnt == 0 {
2485 h.hashval = hashval;
2486 h.cnt = cnt;
2487 self.free_slots -= 1;
2488 if self.free_slots < 0 {
2489 self.rehash();
2490 break;
2491 }
2492 return;
2493 }
2494 if h.hashval == hashval {
2495 h.cnt = h.cnt.saturating_add(cnt);
2496 return;
2497 }
2498 bucket += 1;
2499 if bucket >= lim {
2500 bucket = 0;
2501 }
2502 }
2503 }
2504 }
2505
2506 fn rehash(&mut self) {
2507 let old = std::mem::take(&mut self.data);
2508 let old_log = self.alloc_log2;
2509 self.alloc_log2 = old_log.saturating_add(1);
2510 let new_len = 1usize << self.alloc_log2;
2511 self.free_slots = initial_free(self.alloc_log2);
2512 self.data = vec![SpanSlot::default(); new_len];
2513 let old_sz = 1usize << old_log;
2514 for o in old.iter().take(old_sz) {
2515 let o = *o;
2516 if o.cnt == 0 {
2517 continue;
2518 }
2519 self.add_span_after_rehash(o.hashval, o.cnt);
2520 }
2521 }
2522
2523 fn add_span_after_rehash(&mut self, hashval: u32, cnt: u32) {
2524 loop {
2525 let lim = self.len();
2526 let mut bucket = (hashval as usize) & (lim - 1);
2527 loop {
2528 let h = &mut self.data[bucket];
2529 if h.cnt == 0 {
2530 h.hashval = hashval;
2531 h.cnt = cnt;
2532 self.free_slots -= 1;
2533 if self.free_slots < 0 {
2534 self.rehash();
2535 break;
2536 }
2537 return;
2538 }
2539 if h.hashval == hashval {
2540 h.cnt = h.cnt.saturating_add(cnt);
2541 return;
2542 }
2543 bucket += 1;
2544 if bucket >= lim {
2545 bucket = 0;
2546 }
2547 }
2548 }
2549 }
2550
2551 fn sort_by_hashval(&mut self) {
2552 let sz = self.len();
2553 self.data[..sz].sort_by(|a, b| {
2554 if a.cnt == 0 {
2555 return std::cmp::Ordering::Greater;
2556 }
2557 if b.cnt == 0 {
2558 return std::cmp::Ordering::Less;
2559 }
2560 a.hashval.cmp(&b.hashval)
2561 });
2562 }
2563}
2564
2565fn initial_free(sz_log2: u8) -> i32 {
2566 let sz = sz_log2 as i32;
2567 ((1i32 << sz_log2) * (sz - 3) / sz).max(0)
2568}
2569
2570fn hash_blob_spans(buf: &[u8], is_text: bool) -> SpanHashTop {
2571 let mut hash = SpanHashTop::new(9);
2572 let mut n = 0u32;
2573 let mut accum1: u32 = 0;
2574 let mut accum2: u32 = 0;
2575 let mut i = 0usize;
2576 while i < buf.len() {
2577 let c = buf[i] as u32;
2578 let old_1 = accum1;
2579 i += 1;
2580
2581 if is_text && c == b'\r' as u32 && i < buf.len() && buf[i] == b'\n' {
2582 continue;
2583 }
2584
2585 accum1 = accum1.wrapping_shl(7) ^ accum2.wrapping_shr(25);
2586 accum2 = accum2.wrapping_shl(7) ^ old_1.wrapping_shr(25);
2587 accum1 = accum1.wrapping_add(c);
2588 n += 1;
2589 if n < 64 && c != b'\n' as u32 {
2590 continue;
2591 }
2592 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
2593 hash.add_span(hashval, n);
2594 n = 0;
2595 accum1 = 0;
2596 accum2 = 0;
2597 }
2598 if n > 0 {
2599 let hashval = (accum1.wrapping_add(accum2.wrapping_mul(0x61))) % DIFF_HASHBASE;
2600 hash.add_span(hashval, n);
2601 }
2602 hash.sort_by_hashval();
2603 hash
2604}
2605
2606#[must_use]
2611pub fn diffcore_count_changes(old: &[u8], new: &[u8]) -> (u64, u64) {
2612 let src_is_text = !crate::merge_file::is_binary(old);
2613 let dst_is_text = !crate::merge_file::is_binary(new);
2614 let src_count = hash_blob_spans(old, src_is_text);
2615 let dst_count = hash_blob_spans(new, dst_is_text);
2616 let mut sc: u64 = 0;
2617 let mut la: u64 = 0;
2618 let mut si = 0usize;
2619 let mut di = 0usize;
2620 let src_len = src_count.len();
2621 let dst_len = dst_count.len();
2622 loop {
2623 if si >= src_len || src_count.data[si].cnt == 0 {
2624 break;
2625 }
2626 let s_hash = src_count.data[si].hashval;
2627 let s_cnt = u64::from(src_count.data[si].cnt);
2628 while di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval < s_hash {
2629 la += u64::from(dst_count.data[di].cnt);
2630 di += 1;
2631 }
2632 let mut dst_cnt = 0u64;
2633 if di < dst_len && dst_count.data[di].cnt != 0 && dst_count.data[di].hashval == s_hash {
2634 dst_cnt = u64::from(dst_count.data[di].cnt);
2635 di += 1;
2636 }
2637 if s_cnt < dst_cnt {
2638 la += dst_cnt - s_cnt;
2639 sc += s_cnt;
2640 } else {
2641 sc += dst_cnt;
2642 }
2643 si += 1;
2644 }
2645 while di < dst_len && dst_count.data[di].cnt != 0 {
2646 la += u64::from(dst_count.data[di].cnt);
2647 di += 1;
2648 }
2649 (sc, la)
2650}
2651
2652#[must_use]
2655pub fn should_break_rewrite_for_stat(old: &[u8], new: &[u8]) -> bool {
2656 should_break_rewrite_inner(old, new, DIFF_DEFAULT_BREAK_SCORE)
2657}
2658
2659#[must_use]
2662pub fn rewrite_merge_score(old: &[u8], new: &[u8]) -> Option<u64> {
2663 if old.is_empty() {
2664 return None;
2665 }
2666 let max_size = old.len().max(new.len());
2667 if max_size < DIFF_MINIMUM_BREAK_SIZE {
2668 return None;
2669 }
2670 let (src_copied, _) = diffcore_count_changes(old, new);
2671 let src_copied = src_copied.min(old.len() as u64);
2672 let src_removed = (old.len() as u64).saturating_sub(src_copied);
2673 Some(src_removed * DIFF_MAX_SCORE / old.len() as u64)
2674}
2675
2676#[must_use]
2678pub fn rewrite_dissimilarity_index_percent(old: &[u8], new: &[u8]) -> Option<u32> {
2679 let score = rewrite_merge_score(old, new)?;
2680 Some((score * 100 / DIFF_MAX_SCORE).min(100) as u32)
2681}
2682
2683fn should_break_rewrite_inner(src: &[u8], dst: &[u8], break_score: u64) -> bool {
2684 if src.is_empty() {
2685 return false;
2686 }
2687 let max_size = src.len().max(dst.len());
2688 if max_size < DIFF_MINIMUM_BREAK_SIZE {
2689 return false;
2690 }
2691 let (src_copied, literal_added) = diffcore_count_changes(src, dst);
2692 let src_copied = src_copied.min(src.len() as u64);
2693 let mut literal_added = literal_added;
2694 let dst_len = dst.len() as u64;
2695 if src_copied < dst_len && literal_added + src_copied > dst_len {
2696 literal_added = dst_len.saturating_sub(src_copied);
2697 }
2698 let src_removed = (src.len() as u64).saturating_sub(src_copied);
2699 let merge_score = src_removed * DIFF_MAX_SCORE / src.len() as u64;
2700 if merge_score > break_score {
2701 return true;
2702 }
2703 let delta_size = src_removed.saturating_add(literal_added);
2704 if delta_size * DIFF_MAX_SCORE / (max_size as u64) < break_score {
2705 return false;
2706 }
2707 let s = src.len() as u64;
2708 if (s * break_score < src_removed * DIFF_MAX_SCORE)
2709 && (literal_added * 20 < src_removed)
2710 && (literal_added * 20 < src_copied)
2711 {
2712 return false;
2713 }
2714 true
2715}
2716
2717struct FlatEntry {
2721 path: String,
2722 mode: u32,
2723 oid: ObjectId,
2724}
2725
2726fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
2727 let entries = read_tree(odb, tree_oid)?;
2728 let mut result = Vec::new();
2729
2730 for entry in entries {
2731 let name_str = String::from_utf8_lossy(&entry.name);
2732 let path = format_path(prefix, &name_str);
2733 if is_tree_mode(entry.mode) {
2734 let nested = flatten_tree(odb, &entry.oid, &path)?;
2735 result.extend(nested);
2736 } else {
2737 result.push(FlatEntry {
2738 path,
2739 mode: entry.mode,
2740 oid: entry.oid,
2741 });
2742 }
2743 }
2744
2745 Ok(result)
2746}
2747
2748pub fn head_path_states(
2750 odb: &Odb,
2751 head_tree: Option<&ObjectId>,
2752) -> Result<std::collections::BTreeMap<String, (u32, ObjectId)>> {
2753 let mut m = std::collections::BTreeMap::new();
2754 let Some(t) = head_tree else {
2755 return Ok(m);
2756 };
2757 for fe in flatten_tree(odb, t, "")? {
2758 m.insert(fe.path, (fe.mode, fe.oid));
2759 }
2760 Ok(m)
2761}
2762
2763fn is_tree_mode(mode: u32) -> bool {
2765 mode == 0o040000
2766}
2767
2768fn format_path(prefix: &str, name: &str) -> String {
2770 if prefix.is_empty() {
2771 name.to_owned()
2772 } else {
2773 format!("{prefix}/{name}")
2774 }
2775}
2776
2777pub fn format_mode(mode: u32) -> String {
2779 format!("{mode:06o}")
2780}
2781
2782#[must_use]
2786pub fn read_submodule_head_for_checkout(sub_dir: &Path) -> Option<ObjectId> {
2787 read_submodule_head(sub_dir)
2788}
2789
2790fn submodule_worktree_is_unpopulated_placeholder(sub_dir: &Path) -> bool {
2793 match fs::read_dir(sub_dir) {
2794 Ok(mut it) => it.next().is_none(),
2795 Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
2796 Err(_) => false,
2797 }
2798}
2799
2800fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
2801 read_submodule_head_oid(sub_dir)
2802}
2803
2804#[must_use]
2806pub fn submodule_embedded_git_dir(sub_dir: &Path) -> Option<PathBuf> {
2807 let gitfile = sub_dir.join(".git");
2808 if gitfile.is_file() {
2809 let content = fs::read_to_string(&gitfile).ok()?;
2810 let gitdir = content
2811 .lines()
2812 .find_map(|l| l.strip_prefix("gitdir: "))?
2813 .trim();
2814 Some(if Path::new(gitdir).is_absolute() {
2815 PathBuf::from(gitdir)
2816 } else {
2817 sub_dir.join(gitdir)
2818 })
2819 } else if gitfile.is_dir() {
2820 Some(gitfile)
2821 } else {
2822 None
2823 }
2824}
2825
2826fn find_superproject_git(sub_dir: &Path) -> Option<(PathBuf, PathBuf)> {
2828 let mut cur = sub_dir.parent()?;
2829 loop {
2830 let git_path = cur.join(".git");
2831 if git_path.exists() {
2832 let gd = if git_path.is_file() {
2833 let content = fs::read_to_string(&git_path).ok()?;
2834 let line = content
2835 .lines()
2836 .find_map(|l| l.strip_prefix("gitdir: "))?
2837 .trim();
2838 if Path::new(line).is_absolute() {
2839 PathBuf::from(line)
2840 } else {
2841 cur.join(line)
2842 }
2843 } else {
2844 git_path
2845 };
2846 return Some((cur.to_path_buf(), gd));
2847 }
2848 cur = cur.parent()?;
2849 }
2850}
2851
2852pub fn read_submodule_head_oid(sub_dir: &Path) -> Option<ObjectId> {
2858 let mut git_dir = submodule_embedded_git_dir(sub_dir)?;
2861 if let Some((super_wt, super_git_dir)) = find_superproject_git(sub_dir) {
2862 let rel = sub_dir.strip_prefix(&super_wt).ok()?;
2863 let rel_str = rel.to_string_lossy().replace('\\', "/");
2864 let local_mod = super_git_dir
2865 .join("modules")
2866 .join(rel_str.trim_start_matches('/'));
2867 if local_mod.join("HEAD").exists() {
2868 let sg = super_git_dir.canonicalize().unwrap_or(super_git_dir);
2869 let cur = git_dir.canonicalize().unwrap_or_else(|_| git_dir.clone());
2870 if !cur.starts_with(&sg) {
2871 git_dir = local_mod;
2872 }
2873 }
2874 }
2875 let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
2876 let head_content = head_content.trim();
2877 if let Some(refname) = head_content.strip_prefix("ref: ") {
2878 let ref_path = git_dir.join(refname);
2880 let oid_hex = fs::read_to_string(&ref_path).ok()?;
2881 ObjectId::from_hex(oid_hex.trim()).ok()
2882 } else {
2883 ObjectId::from_hex(head_content).ok()
2885 }
2886}
2887
2888#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
2890pub struct SubmodulePorcelainFlags {
2891 pub new_commits: bool,
2893 pub modified: bool,
2895 pub untracked: bool,
2897}
2898
2899pub fn submodule_porcelain_flags(
2905 super_worktree: &Path,
2906 rel_path: &str,
2907 recorded_oid: ObjectId,
2908) -> SubmodulePorcelainFlags {
2909 let sub_dir = super_worktree.join(rel_path);
2910 let Some(sub_git_dir) = submodule_embedded_git_dir(&sub_dir) else {
2911 return SubmodulePorcelainFlags::default();
2912 };
2913 let Some(sub_head) = read_submodule_head_oid(&sub_dir) else {
2914 return SubmodulePorcelainFlags::default();
2915 };
2916
2917 let new_commits = sub_head != recorded_oid;
2918
2919 let index_path = sub_git_dir.join("index");
2920 let sub_index = match crate::index::Index::load(&index_path) {
2921 Ok(ix) => ix,
2922 Err(_) => {
2923 return SubmodulePorcelainFlags {
2924 new_commits,
2925 ..Default::default()
2926 }
2927 }
2928 };
2929
2930 let tracked: std::collections::BTreeSet<String> = sub_index
2931 .entries
2932 .iter()
2933 .filter(|e| e.stage() == 0)
2934 .map(|e| String::from_utf8_lossy(&e.path).into_owned())
2935 .collect();
2936 let untracked = submodule_dir_has_untracked_inner(&sub_dir, &sub_dir, &tracked);
2937
2938 let objects_dir = sub_git_dir.join("objects");
2939 let odb = Odb::new(&objects_dir);
2940
2941 let sub_head_tree = (|| -> Option<ObjectId> {
2942 let h = fs::read_to_string(sub_git_dir.join("HEAD")).ok()?;
2943 let h_str = h.trim();
2944 let commit_oid = if let Some(r) = h_str.strip_prefix("ref: ") {
2945 let oid_hex = fs::read_to_string(sub_git_dir.join(r)).ok()?;
2946 ObjectId::from_hex(oid_hex.trim()).ok()?
2947 } else {
2948 ObjectId::from_hex(h_str).ok()?
2949 };
2950 let obj = odb.read(&commit_oid).ok()?;
2951 let commit = parse_commit(&obj.data).ok()?;
2952 Some(commit.tree)
2953 })();
2954
2955 let staged_dirty = sub_head_tree
2956 .as_ref()
2957 .map(|t| diff_index_to_tree(&odb, &sub_index, Some(t)).map(|v| !v.is_empty()))
2958 .unwrap_or(Ok(false));
2959 let staged_dirty = staged_dirty.unwrap_or(false);
2960
2961 let unstaged_dirty = diff_index_to_worktree(&odb, &sub_index, &sub_dir)
2962 .map(|v| !v.is_empty())
2963 .unwrap_or(false);
2964
2965 let modified = staged_dirty || unstaged_dirty;
2966
2967 SubmodulePorcelainFlags {
2968 new_commits,
2969 modified,
2970 untracked,
2971 }
2972}
2973
2974fn submodule_dir_has_untracked_inner(
2975 dir: &Path,
2976 root: &Path,
2977 tracked: &std::collections::BTreeSet<String>,
2978) -> bool {
2979 let entries = match fs::read_dir(dir) {
2980 Ok(e) => e,
2981 Err(_) => return false,
2982 };
2983 let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
2984 sorted.sort_by_key(|e| e.file_name());
2985
2986 for entry in sorted {
2987 let name = entry.file_name().to_string_lossy().to_string();
2988 if name == ".git" {
2989 continue;
2990 }
2991 let path = entry.path();
2992 let rel = path
2993 .strip_prefix(root)
2994 .map(|p| p.to_string_lossy().to_string())
2995 .unwrap_or_else(|_| name.clone());
2996
2997 let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
2998 if is_dir {
2999 if submodule_dir_has_untracked_inner(&path, root, tracked) {
3000 return true;
3001 }
3002 } else if !tracked.contains(&rel) {
3003 return true;
3004 }
3005 }
3006 false
3007}