1use similar::Algorithm as SimilarAlgorithm;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum BlameDiffAlgorithm {
15 Myers,
16 Histogram,
17 Patience,
18 Minimal,
19}
20
21impl BlameDiffAlgorithm {
22 pub fn to_similar(self) -> SimilarAlgorithm {
23 match self {
24 BlameDiffAlgorithm::Myers => SimilarAlgorithm::Myers,
28 BlameDiffAlgorithm::Histogram => SimilarAlgorithm::Patience,
29 BlameDiffAlgorithm::Patience => SimilarAlgorithm::Patience,
30 BlameDiffAlgorithm::Minimal => SimilarAlgorithm::Lcs,
31 }
32 }
33}
34
35pub fn parse_diff_algorithm_name(name: &str) -> Option<BlameDiffAlgorithm> {
36 match name.to_ascii_lowercase().as_str() {
37 "myers" | "default" => Some(BlameDiffAlgorithm::Myers),
38 "histogram" => Some(BlameDiffAlgorithm::Histogram),
39 "patience" => Some(BlameDiffAlgorithm::Patience),
40 "minimal" => Some(BlameDiffAlgorithm::Minimal),
41 _ => None,
42 }
43}
44
45pub fn should_drop_tail_match_for_myers(
46 diff_algorithm: BlameDiffAlgorithm,
47 parent_idx: usize,
48 current_idx: usize,
49 parent_lines: &[&str],
50) -> bool {
51 if diff_algorithm != BlameDiffAlgorithm::Myers {
52 return false;
53 }
54 if parent_lines.is_empty() || parent_idx + 1 != parent_lines.len() {
55 return false;
56 }
57 if current_idx <= parent_idx {
60 return false;
61 }
62 let tail = parent_lines[parent_idx];
65 parent_lines.iter().filter(|line| **line == tail).count() >= 2
66}
67
68static BLAME_INDENT_HEURISTIC: std::sync::atomic::AtomicBool =
71 std::sync::atomic::AtomicBool::new(false);
72
73fn blame_indent_heuristic_enabled() -> bool {
74 BLAME_INDENT_HEURISTIC.load(std::sync::atomic::Ordering::Relaxed)
75}
76
77pub fn set_blame_indent_heuristic(enabled: bool) {
78 BLAME_INDENT_HEURISTIC.store(enabled, std::sync::atomic::Ordering::Relaxed);
79}
80
81pub fn build_line_map(
83 old: &[&str],
84 new: &[&str],
85 diff_algorithm: BlameDiffAlgorithm,
86) -> Vec<Option<usize>> {
87 let mut old_joined = old.join("\n");
89 old_joined.push('\n');
90 let mut new_joined = new.join("\n");
91 new_joined.push('\n');
92
93 let ops = crate::diff_indent_heuristic::diff_lines_ops_compacted(
96 &old_joined,
97 &new_joined,
98 diff_algorithm.to_similar(),
99 blame_indent_heuristic_enabled(),
100 );
101
102 let mut result = vec![None; new.len()];
103 for op in ops {
104 if let similar::DiffOp::Equal {
105 old_index,
106 new_index,
107 len,
108 } = op
109 {
110 for k in 0..len {
111 if new_index + k < result.len() {
112 result[new_index + k] = Some(old_index + k);
113 }
114 }
115 }
116 }
117
118 result
119}
120
121pub fn build_fuzzy_line_map(
122 old: &[&str],
123 new: &[&str],
124 exact_map: &[Option<usize>],
125) -> Vec<Option<usize>> {
126 let mut fuzzy_map = exact_map.to_vec();
127 let mut used_old = vec![0usize; old.len()];
128 for old_idx in fuzzy_map.iter().flatten() {
129 if *old_idx < used_old.len() {
130 used_old[*old_idx] += 1;
131 }
132 }
133
134 for new_idx in 0..fuzzy_map.len() {
138 if fuzzy_map[new_idx].is_some() {
139 continue;
140 }
141 let mut best: Option<(usize, usize, usize)> = None;
142 for old_idx in 0..old.len() {
143 if old[old_idx] != new[new_idx] {
144 continue;
145 }
146 let candidate = (used_old[old_idx], old_idx.abs_diff(new_idx), old_idx);
147 if best.is_none_or(|b| candidate < b) {
148 best = Some(candidate);
149 }
150 }
151 if let Some((_, _, old_idx)) = best {
152 fuzzy_map[new_idx] = Some(old_idx);
153 used_old[old_idx] += 1;
154 }
155 }
156
157 let mut anchors: Vec<(usize, usize)> = exact_map
158 .iter()
159 .enumerate()
160 .filter_map(|(new_idx, old_idx)| old_idx.map(|old| (new_idx, old)))
161 .collect();
162 anchors.sort_unstable();
163
164 let mut prev_new = usize::MAX;
165 let mut prev_old = usize::MAX;
166 for (next_new, next_old) in anchors
167 .iter()
168 .copied()
169 .chain(std::iter::once((new.len(), old.len())))
170 {
171 let new_start = if prev_new == usize::MAX {
172 0
173 } else {
174 prev_new + 1
175 };
176 let new_end = next_new;
177 let old_start = if prev_old == usize::MAX {
178 0
179 } else {
180 prev_old + 1
181 };
182 let old_end = next_old;
183
184 if new_start < new_end && old_start < old_end {
185 let segment_matches =
186 fuzzy_match_segment(old, old_start, old_end, new, new_start, new_end);
187 for (new_idx, old_idx) in segment_matches {
188 if fuzzy_map[new_idx].is_none() {
189 fuzzy_map[new_idx] = Some(old_idx);
190 used_old[old_idx] += 1;
191 }
192 }
193 }
194
195 prev_new = next_new;
196 prev_old = next_old;
197 }
198
199 for new_idx in 0..fuzzy_map.len() {
203 if fuzzy_map[new_idx].is_some() {
204 continue;
205 }
206 let prev_old = (0..new_idx).rev().find_map(|i| fuzzy_map[i]);
207 let next_old = ((new_idx + 1)..fuzzy_map.len()).find_map(|i| fuzzy_map[i]);
208
209 let mut candidates = Vec::new();
210 if let Some(o) = prev_old {
211 candidates.push(o);
212 }
213 if let Some(o) = next_old {
214 if candidates.last().copied() != Some(o) {
215 candidates.push(o);
216 }
217 }
218
219 let mut best: Option<(f64, usize)> = None;
220 for old_idx in candidates {
221 if old_idx >= old.len() {
222 continue;
223 }
224 let (sim, lcs) = line_similarity_and_lcs(old[old_idx], new[new_idx]);
225 let exact_text = old[old_idx].trim() == new[new_idx].trim();
226 if !exact_text && lcs < 6 && !(sim >= 0.35 && lcs >= 3) {
228 continue;
229 }
230
231 let mut score = lcs as f64 + sim * 10.0;
232 score -= 0.2 * used_old[old_idx] as f64;
233
234 if let (Some(lo), Some(hi)) = (prev_old, next_old) {
235 if lo <= hi && (old_idx < lo || old_idx > hi) {
236 score -= 3.0;
237 }
238 }
239
240 if best.is_none_or(|(best_score, best_old)| {
241 score > best_score
242 || ((score - best_score).abs() < 1e-9
243 && old_idx.abs_diff(new_idx) < best_old.abs_diff(new_idx))
244 }) {
245 best = Some((score, old_idx));
246 }
247 }
248
249 if let Some((_, old_idx)) = best {
250 fuzzy_map[new_idx] = Some(old_idx);
251 used_old[old_idx] += 1;
252 }
253 }
254
255 for new_idx in 0..fuzzy_map.len() {
259 if fuzzy_map[new_idx].is_some() {
260 continue;
261 }
262 let prev_old = (0..new_idx).rev().find_map(|i| fuzzy_map[i]);
263 let next_old = ((new_idx + 1)..fuzzy_map.len()).find_map(|i| fuzzy_map[i]);
264
265 let mut best: Option<(f64, usize)> = None;
266 for old_idx in 0..old.len() {
267 let (sim, lcs) = line_similarity_and_lcs(old[old_idx], new[new_idx]);
268 let exact_text = old[old_idx].trim() == new[new_idx].trim();
269 let new_len = new[new_idx].trim().chars().count();
270 if !exact_text && (new_len < 3 || sim < 0.45 || lcs < 2) {
271 continue;
272 }
273
274 let mut score = sim;
275 score -= 0.004 * old_idx.abs_diff(new_idx) as f64;
276 score -= 0.08 * used_old[old_idx] as f64;
277
278 if let (Some(lo), Some(hi)) = (prev_old, next_old) {
281 if lo <= hi {
282 if old_idx < lo {
283 score -= 0.20 * (lo - old_idx) as f64;
284 } else if old_idx > hi {
285 score -= 0.20 * (old_idx - hi) as f64;
286 }
287 }
288 } else if let Some(lo) = prev_old {
289 if old_idx < lo {
290 score -= 0.20 * (lo - old_idx) as f64;
291 }
292 } else if let Some(hi) = next_old {
293 if old_idx > hi {
294 score -= 0.20 * (old_idx - hi) as f64;
295 }
296 }
297
298 if best.is_none_or(|(best_score, best_idx)| {
299 score > best_score
300 || ((score - best_score).abs() < 1e-9
301 && old_idx.abs_diff(new_idx) < best_idx.abs_diff(new_idx))
302 }) {
303 best = Some((score, old_idx));
304 }
305 }
306
307 if let Some((_, old_idx)) = best {
308 fuzzy_map[new_idx] = Some(old_idx);
309 used_old[old_idx] += 1;
310 }
311 }
312
313 fuzzy_map
314}
315
316fn fuzzy_match_segment(
317 old: &[&str],
318 old_start: usize,
319 old_end: usize,
320 new: &[&str],
321 new_start: usize,
322 new_end: usize,
323) -> Vec<(usize, usize)> {
324 let m = old_end.saturating_sub(old_start);
325 let n = new_end.saturating_sub(new_start);
326 if m == 0 || n == 0 {
327 return Vec::new();
328 }
329
330 let states = m + 1;
335 let neg_inf = f64::NEG_INFINITY;
336 let mut dp = vec![neg_inf; states];
337 dp[0] = 0.0;
338
339 let mut back_prev = vec![vec![usize::MAX; states]; n + 1];
340 let mut back_pick = vec![vec![None; states]; n + 1];
341
342 for j in 0..n {
343 let mut next_dp = vec![neg_inf; states];
344
345 for state in 0..states {
346 let base = dp[state];
347 if !base.is_finite() {
348 continue;
349 }
350
351 if base > next_dp[state] {
353 next_dp[state] = base;
354 back_prev[j + 1][state] = state;
355 back_pick[j + 1][state] = None;
356 }
357
358 let start_k = if state == 0 { 0 } else { state - 1 };
360 for k in start_k..m {
361 let old_idx = old_start + k;
362 let new_idx = new_start + j;
363 let (sim, lcs) = line_similarity_and_lcs(old[old_idx], new[new_idx]);
364 let exact_text = old[old_idx].trim() == new[new_idx].trim();
365 let new_len = new[new_idx].trim().chars().count();
366 if !exact_text && (new_len < 3 || sim < 0.45 || lcs < 2) {
367 continue;
368 }
369
370 let distance = k.abs_diff(j) as f64;
372 let score = base + sim - 0.002 * distance;
373 let next_state = k + 1;
374 if score > next_dp[next_state] {
375 next_dp[next_state] = score;
376 back_prev[j + 1][next_state] = state;
377 back_pick[j + 1][next_state] = Some(k);
378 }
379 }
380 }
381
382 dp = next_dp;
383 }
384
385 let mut best_state = 0usize;
386 for state in 1..states {
387 if dp[state] > dp[best_state] {
388 best_state = state;
389 }
390 }
391
392 let mut matches = Vec::new();
393 let mut state = best_state;
394 for j in (1..=n).rev() {
395 if let Some(k) = back_pick[j][state] {
396 matches.push((new_start + (j - 1), old_start + k));
397 }
398 let prev = back_prev[j][state];
399 if prev == usize::MAX {
400 break;
401 }
402 state = prev;
403 }
404 matches.reverse();
405 matches
406}
407
408fn line_similarity_and_lcs(a: &str, b: &str) -> (f64, usize) {
409 let a = a.trim();
410 let b = b.trim();
411 if a.is_empty() || b.is_empty() {
412 return (0.0, 0);
413 }
414 if a == b {
415 return (1.0, a.chars().count());
416 }
417
418 let a = a.to_ascii_lowercase();
419 let b = b.to_ascii_lowercase();
420 let a_chars: Vec<char> = a.chars().collect();
421 let b_chars: Vec<char> = b.chars().collect();
422 let n = b_chars.len();
423 if a_chars.is_empty() || b_chars.is_empty() {
424 return (0.0, 0);
425 }
426
427 let mut prev = vec![0usize; n + 1];
428 let mut curr = vec![0usize; n + 1];
429 for i in 1..=a_chars.len() {
430 for j in 1..=n {
431 curr[j] = if a_chars[i - 1] == b_chars[j - 1] {
432 prev[j - 1] + 1
433 } else {
434 prev[j].max(curr[j - 1])
435 };
436 }
437 std::mem::swap(&mut prev, &mut curr);
438 curr.fill(0);
439 }
440
441 let lcs = prev[n];
442 let sim = (2.0 * lcs as f64) / (a_chars.len() as f64 + b_chars.len() as f64);
443 (sim, lcs)
444}
445
446use std::collections::{HashMap, HashSet, VecDeque};
459use std::fs::{self, OpenOptions};
460use std::io::{self, Write};
461use std::path::Path;
462use std::process::{Command, Stdio};
463use std::sync::OnceLock;
464use std::time::{SystemTime, UNIX_EPOCH};
465
466use crate::config::ConfigSet;
467use crate::crlf::{
468 convert_to_git, convert_to_worktree_eager, get_file_attrs, load_gitattributes,
469 load_gitattributes_from_index, ConversionConfig, GitAttributes,
470};
471use crate::error::{Error as LibError, Result};
472use crate::objects::{parse_commit, parse_tree, CommitData, Object, ObjectId, ObjectKind};
473use crate::odb::Odb;
474use crate::repo::Repository;
475use crate::wildmatch::wildmatch;
476
477type PromisorHydrateHook = fn(&Repository, ObjectId);
481
482static PROMISOR_HYDRATE_HOOK: OnceLock<PromisorHydrateHook> = OnceLock::new();
483
484pub fn set_promisor_hydrate_hook(hook: PromisorHydrateHook) {
488 let _ = PROMISOR_HYDRATE_HOOK.set(hook);
489}
490
491fn promisor_hydrate_hook() -> Option<PromisorHydrateHook> {
492 PROMISOR_HYDRATE_HOOK.get().copied()
493}
494#[derive(Debug, Clone)]
496pub struct BlameLine {
497 pub oid: ObjectId,
498 pub final_lineno: usize,
500 pub orig_lineno: usize,
502 pub content: String,
503 pub source_file: Option<String>,
505 pub ignored: bool,
507 pub unblamable: bool,
509 pub external_contents: bool,
511}
512
513fn resolve_path_in_tree_entry(
515 odb: &Odb,
516 tree_oid: &ObjectId,
517 path: &str,
518) -> Result<Option<(ObjectId, u32)>> {
519 let parts: Vec<&str> = path.split('/').collect();
520 let mut current = *tree_oid;
521
522 for (i, part) in parts.iter().enumerate() {
523 let obj = read_object_for_blame(odb, ¤t)?;
524 let entries = parse_tree(&obj.data)?;
525 match entries
526 .iter()
527 .find(|e| String::from_utf8_lossy(&e.name) == *part)
528 {
529 Some(e) if i == parts.len() - 1 => {
530 if e.mode == 0o040000 {
531 return Ok(None);
532 }
533 return Ok(Some((e.oid, e.mode)));
534 }
535 Some(e) if e.mode == 0o040000 => current = e.oid,
536 Some(_) => return Ok(None),
537 None => return Ok(None),
538 }
539 }
540 Ok(None)
541}
542
543fn content_lines(s: &str) -> Vec<&str> {
546 if s.is_empty() {
547 return Vec::new();
548 }
549 let mut out: Vec<&str> = s.split('\n').collect();
550 if out.last() == Some(&"") {
551 out.pop();
552 }
553 out
554}
555
556#[derive(Debug, Clone)]
560struct TrackedLine {
561 final_lineno: usize,
562 current_idx: usize,
563 ignored: bool,
564 source_path: Option<String>,
565}
566
567#[derive(Debug, Clone)]
568pub struct BlameTextconvContext {
569 pub config: ConfigSet,
570 conversion: ConversionConfig,
571 pub attrs: GitAttributes,
572 diff_attrs: Vec<DiffAttrRule>,
573}
574
575impl BlameTextconvContext {
576 pub fn new(repo: &Repository) -> Self {
577 let config = ConfigSet::load(Some(&repo.git_dir), true).unwrap_or_default();
578 let conversion = ConversionConfig::from_config(&config);
579 let attrs = load_attr_rules(repo);
580 let diff_attrs = load_diff_attr_rules(repo);
581 Self {
582 config,
583 conversion,
584 attrs,
585 diff_attrs,
586 }
587 }
588}
589
590#[derive(Debug, Clone)]
591struct DiffAttrRule {
592 pattern: String,
593 value: DiffAttrValue,
594}
595
596#[derive(Debug, Clone)]
597enum DiffAttrValue {
598 Unset,
599 Set,
600 Driver(String),
601}
602
603fn load_attr_rules(repo: &Repository) -> GitAttributes {
604 if let Some(work_tree) = repo.work_tree.as_deref() {
605 let rules = load_gitattributes(work_tree);
606 if !rules.is_empty() {
607 return rules;
608 }
609 }
610
611 if let Ok(index) = repo.load_index() {
612 return load_gitattributes_from_index(&index, &repo.odb);
613 }
614
615 Vec::new()
616}
617
618fn load_diff_attr_rules(repo: &Repository) -> Vec<DiffAttrRule> {
619 let mut rules = Vec::new();
620
621 if let Some(work_tree) = repo.work_tree.as_deref() {
622 parse_diff_attr_file(&work_tree.join(".gitattributes"), &mut rules);
623 parse_diff_attr_file(&work_tree.join(".git/info/attributes"), &mut rules);
624 }
625
626 if rules.is_empty() {
627 if let Ok(index) = repo.load_index() {
628 if let Some(entry) = index.get(b".gitattributes", 0) {
629 if let Ok(obj) = repo.odb.read(&entry.oid) {
630 if let Ok(content) = String::from_utf8(obj.data) {
631 parse_diff_attr_content(&content, &mut rules);
632 }
633 }
634 }
635 }
636 parse_diff_attr_file(&repo.git_dir.join("info/attributes"), &mut rules);
637 }
638
639 rules
640}
641
642pub fn read_object_for_blame(odb: &Odb, oid: &ObjectId) -> Result<Object> {
643 match odb.read(oid) {
644 Ok(obj) => Ok(obj),
645 Err(LibError::ObjectNotFound(_)) => {
646 if let Some(hook) = promisor_hydrate_hook() {
647 if let Ok(repo) = Repository::discover(None) {
648 hook(&repo, *oid);
649 return odb.read(oid);
650 }
651 }
652 Err(LibError::ObjectNotFound(oid.to_hex()))
653 }
654 Err(err) => Err(err),
655 }
656}
657
658fn parse_diff_attr_file(path: &Path, rules: &mut Vec<DiffAttrRule>) {
659 if let Ok(content) = std::fs::read_to_string(path) {
660 parse_diff_attr_content(&content, rules);
661 }
662}
663
664fn parse_diff_attr_content(content: &str, rules: &mut Vec<DiffAttrRule>) {
665 for line in content.lines() {
666 let line = line.trim();
667 if line.is_empty() || line.starts_with('#') {
668 continue;
669 }
670
671 let mut parts = line.split_whitespace();
672 let Some(pattern) = parts.next() else {
673 continue;
674 };
675
676 let mut value: Option<DiffAttrValue> = None;
677 for token in parts {
678 if token == "binary" || token == "-diff" {
679 value = Some(DiffAttrValue::Unset);
680 } else if token == "diff" {
681 value = Some(DiffAttrValue::Set);
682 } else if let Some(driver) = token.strip_prefix("diff=") {
683 value = Some(DiffAttrValue::Driver(driver.to_owned()));
684 }
685 }
686
687 if let Some(value) = value {
688 rules.push(DiffAttrRule {
689 pattern: pattern.to_owned(),
690 value,
691 });
692 }
693 }
694}
695
696fn resolve_textconv_command(ctx: &BlameTextconvContext, path: &str) -> Option<String> {
697 let mut selected: Option<DiffAttrValue> = None;
698 for rule in &ctx.diff_attrs {
699 if diff_attr_pattern_matches(&rule.pattern, path) {
700 selected = Some(rule.value.clone());
701 }
702 }
703
704 match selected {
705 Some(DiffAttrValue::Driver(driver)) => ctx.config.get(&format!("diff.{driver}.textconv")),
706 _ => None,
707 }
708}
709
710fn diff_attr_pattern_matches(pattern: &str, path: &str) -> bool {
711 if pattern.contains('/') {
712 return wildmatch(pattern.as_bytes(), path.as_bytes(), 0);
713 }
714 let basename = path.rsplit('/').next().unwrap_or(path);
715 wildmatch(pattern.as_bytes(), basename.as_bytes(), 0)
716}
717
718fn is_regular_mode(mode: u32) -> bool {
719 mode & 0o170000 == 0o100000
720}
721
722fn run_textconv_command(command: &str, input_data: &[u8]) -> Result<Vec<u8>> {
723 let temp_path = create_temp_textconv_file(input_data)?;
724 let quoted = shell_quote(temp_path.to_string_lossy().as_ref());
725 let shell_command = format!("{command} {quoted}");
726
727 let output = Command::new("sh")
728 .arg("-c")
729 .arg(&shell_command)
730 .stdin(Stdio::null())
731 .stdout(Stdio::piped())
732 .stderr(Stdio::inherit())
733 .output()
734 .map_err(|e| LibError::Message(format!("running textconv command '{command}': {e}")))?;
735
736 let _ = std::fs::remove_file(&temp_path);
737
738 if !output.status.success() {
739 return Err(LibError::Message(format!(
740 "textconv command exited with status {}",
741 output.status
742 )));
743 }
744
745 Ok(output.stdout)
746}
747
748fn create_temp_textconv_file(data: &[u8]) -> Result<std::path::PathBuf> {
749 let pid = std::process::id();
750 for attempt in 0..32u32 {
751 let now = SystemTime::now()
752 .duration_since(UNIX_EPOCH)
753 .unwrap_or_default()
754 .as_nanos();
755 let path = std::env::temp_dir().join(format!("grit-blame-textconv-{pid}-{now}-{attempt}"));
756 match OpenOptions::new().create_new(true).write(true).open(&path) {
757 Ok(mut file) => {
758 file.write_all(data)?;
759 return Ok(path);
760 }
761 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => continue,
762 Err(err) => return Err(err.into()),
763 }
764 }
765 Err(LibError::Message(
766 "failed to create temporary textconv input file".to_string(),
767 ))
768}
769
770fn shell_quote(text: &str) -> String {
771 format!("'{}'", text.replace('\'', "'\\''"))
772}
773
774fn read_blob_content_for_blame(
775 odb: &Odb,
776 oid: &ObjectId,
777 path: &str,
778 mode: u32,
779 textconv_ctx: Option<&BlameTextconvContext>,
780 use_textconv: bool,
781) -> Result<String> {
782 let obj = read_object_for_blame(odb, oid)?;
783 if obj.kind != ObjectKind::Blob {
784 return Err(LibError::Message("expected blob object".to_string()));
785 }
786
787 if !use_textconv || !is_regular_mode(mode) {
788 return Ok(String::from_utf8_lossy(&obj.data).into_owned());
789 }
790
791 let Some(ctx) = textconv_ctx else {
792 return Ok(String::from_utf8_lossy(&obj.data).into_owned());
793 };
794 let Some(command) = resolve_textconv_command(ctx, path) else {
795 return Ok(String::from_utf8_lossy(&obj.data).into_owned());
796 };
797
798 let attrs = get_file_attrs(&ctx.attrs, path, false, &ctx.config);
799 let oid_hex = oid.to_string();
800 let worktree_data = convert_to_worktree_eager(
801 &obj.data,
802 path,
803 &ctx.conversion,
804 &attrs,
805 Some(&oid_hex),
806 None,
807 )
808 .map_err(|e| LibError::Message(format!("{e}")))?;
809 let converted = run_textconv_command(&command, &worktree_data)
810 .or_else(|_| run_textconv_command(&command, &obj.data))?;
811 Ok(String::from_utf8_lossy(&converted).into_owned())
812}
813
814pub fn compute_blame(
816 odb: &Odb,
817 start_oid: ObjectId,
818 file_path: &str,
819 ignore_revs: &HashSet<ObjectId>,
820 diff_algorithm: BlameDiffAlgorithm,
821 textconv_ctx: Option<&BlameTextconvContext>,
822 use_textconv: bool,
823 copy_depth: usize,
824 first_parent_only: bool,
825 grafts: &HashMap<ObjectId, Vec<ObjectId>>,
826) -> Result<Vec<BlameLine>> {
827 let start_commit = {
828 let obj = read_object_for_blame(odb, &start_oid)?;
829 parse_commit(&obj.data)?
830 };
831
832 let (blob_oid, blob_mode) = resolve_path_in_tree_entry(odb, &start_commit.tree, file_path)?
833 .ok_or_else(|| LibError::Message(format!("file '{file_path}' not found in revision")))?;
834 let content = read_blob_content_for_blame(
835 odb,
836 &blob_oid,
837 file_path,
838 blob_mode,
839 textconv_ctx,
840 use_textconv,
841 )?;
842 let lines = content_lines(&content);
843 let num_lines = lines.len();
844
845 if num_lines == 0 {
846 return Ok(Vec::new());
847 }
848
849 let mut pending: Vec<TrackedLine> = (0..num_lines)
851 .map(|i| TrackedLine {
852 final_lineno: i + 1,
853 current_idx: i,
854 ignored: false,
855 source_path: None,
856 })
857 .collect();
858
859 let mut result: Vec<BlameLine> = Vec::with_capacity(num_lines);
860 let final_lines: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
862
863 let mut current_oid = start_oid;
864 let mut current_blob_oid = blob_oid;
865 let mut current_blob_mode = blob_mode;
866 let mut current_path = file_path.to_string();
867 let mut commit_cache: HashMap<ObjectId, CommitData> = HashMap::new();
868 commit_cache.insert(start_oid, start_commit);
869 let mut deferred: VecDeque<(ObjectId, ObjectId, u32, String, Vec<TrackedLine>)> =
870 VecDeque::new();
871
872 'blame_loop: loop {
873 if pending.is_empty() {
874 if let Some((oid, blob, mode, path, lines)) = deferred.pop_front() {
875 current_oid = oid;
876 current_blob_oid = blob;
877 current_blob_mode = mode;
878 current_path = path;
879 pending = lines;
880 continue;
881 }
882 break;
883 }
884
885 let commit = get_commit(odb, current_oid, &mut commit_cache)?;
886 let parents = commit_parents_for_blame(odb, current_oid, grafts, &mut commit_cache)?;
887
888 let is_ignored = ignore_revs.contains(¤t_oid);
889
890 if is_ignored && parents.len() > 1 {
893 let cur_content = read_blob_content_for_blame(
894 odb,
895 ¤t_blob_oid,
896 ¤t_path,
897 current_blob_mode,
898 textconv_ctx,
899 use_textconv,
900 )?;
901 let cur_lines = content_lines(&cur_content);
902
903 let mut parent_lines: Vec<Option<Vec<String>>> = Vec::new();
904 let mut parent_blames: Vec<Option<Vec<BlameLine>>> = Vec::new();
905 for parent_oid in &parents {
906 let parent_commit = get_commit(odb, *parent_oid, &mut commit_cache)?;
907 if let Some((p_blob_oid, p_blob_mode)) =
908 resolve_path_in_tree_entry(odb, &parent_commit.tree, ¤t_path)?
909 {
910 let p_content = read_blob_content_for_blame(
911 odb,
912 &p_blob_oid,
913 ¤t_path,
914 p_blob_mode,
915 textconv_ctx,
916 use_textconv,
917 )?;
918 let p_lines = content_lines(&p_content)
919 .iter()
920 .map(|s| s.to_string())
921 .collect::<Vec<_>>();
922 let p_blame = compute_blame(
923 odb,
924 *parent_oid,
925 ¤t_path,
926 ignore_revs,
927 diff_algorithm,
928 textconv_ctx,
929 use_textconv,
930 copy_depth,
931 first_parent_only,
932 grafts,
933 )?;
934 parent_lines.push(Some(p_lines));
935 parent_blames.push(Some(p_blame));
936 } else {
937 parent_lines.push(None);
938 parent_blames.push(None);
939 }
940 }
941
942 for t in pending.drain(..) {
943 let idx = t.current_idx;
944 let Some(cur_line) = cur_lines.get(idx).copied() else {
945 result.push(BlameLine {
946 oid: current_oid,
947 final_lineno: t.final_lineno,
948 orig_lineno: idx + 1,
949 content: final_lines[t.final_lineno - 1].clone(),
950 source_file: None,
951 ignored: t.ignored,
952 unblamable: true,
953 external_contents: false,
954 });
955 continue;
956 };
957
958 let mut picked: Option<BlameLine> = None;
959 for i in 0..parents.len() {
960 let Some(lines) = parent_lines.get(i).and_then(|v| v.as_ref()) else {
961 continue;
962 };
963 if idx >= lines.len() || lines[idx] != cur_line {
964 continue;
965 }
966 let Some(blames) = parent_blames.get(i).and_then(|v| v.as_ref()) else {
967 continue;
968 };
969 if let Some(line_blame) = blames.iter().find(|b| b.final_lineno == idx + 1) {
970 picked = Some(line_blame.clone());
971 break;
972 }
973 }
974
975 if let Some(pb) = picked {
976 result.push(BlameLine {
977 oid: pb.oid,
978 final_lineno: t.final_lineno,
979 orig_lineno: pb.orig_lineno,
980 content: final_lines[t.final_lineno - 1].clone(),
981 source_file: pb.source_file,
982 ignored: true,
983 unblamable: pb.unblamable,
984 external_contents: false,
985 });
986 } else {
987 result.push(BlameLine {
988 oid: current_oid,
989 final_lineno: t.final_lineno,
990 orig_lineno: idx + 1,
991 content: final_lines[t.final_lineno - 1].clone(),
992 source_file: None,
993 ignored: t.ignored,
994 unblamable: true,
995 external_contents: false,
996 });
997 }
998 }
999 if !deferred.is_empty() {
1000 continue 'blame_loop;
1001 }
1002 break 'blame_loop;
1003 }
1004
1005 if parents.is_empty() {
1006 for t in pending.drain(..) {
1008 result.push(BlameLine {
1009 oid: current_oid,
1010 final_lineno: t.final_lineno,
1011 orig_lineno: t.current_idx + 1,
1012 content: final_lines[t.final_lineno - 1].clone(),
1013 source_file: None,
1014 ignored: t.ignored,
1015 unblamable: false,
1016 external_contents: false,
1017 });
1018 }
1019 if !deferred.is_empty() {
1020 continue 'blame_loop;
1021 }
1022 break 'blame_loop;
1023 }
1024
1025 let mut all_parents_have_path = parents.len() >= 2;
1029 if all_parents_have_path {
1030 for p in &parents {
1031 let pc = get_commit(odb, *p, &mut commit_cache)?;
1032 if resolve_path_in_tree_entry(odb, &pc.tree, ¤t_path)?.is_none() {
1033 all_parents_have_path = false;
1034 break;
1035 }
1036 }
1037 }
1038
1039 if !first_parent_only && !is_ignored && all_parents_have_path {
1040 let cur_content = read_blob_content_for_blame(
1041 odb,
1042 ¤t_blob_oid,
1043 ¤t_path,
1044 current_blob_mode,
1045 textconv_ctx,
1046 use_textconv,
1047 )?;
1048 let cur_lines = content_lines(&cur_content);
1049
1050 let mut parent_blobs: Vec<(ObjectId, ObjectId, u32)> =
1051 Vec::with_capacity(parents.len());
1052 let mut par_lines_vec: Vec<Vec<String>> = Vec::with_capacity(parents.len());
1053 let mut maps: Vec<Vec<Option<usize>>> = Vec::with_capacity(parents.len());
1054
1055 for &p in &parents {
1056 let pc = get_commit(odb, p, &mut commit_cache)?;
1057 let Some((blob, mode)) = resolve_path_in_tree_entry(odb, &pc.tree, ¤t_path)?
1058 else {
1059 return Err(LibError::Message(
1060 "internal: missing blob in merge parent".to_string(),
1061 ));
1062 };
1063 let par_content = read_blob_content_for_blame(
1064 odb,
1065 &blob,
1066 ¤t_path,
1067 mode,
1068 textconv_ctx,
1069 use_textconv,
1070 )?;
1071 let pl: Vec<String> = content_lines(&par_content)
1072 .iter()
1073 .map(|s| (*s).to_string())
1074 .collect();
1075 let pl_refs: Vec<&str> = pl.iter().map(|s| s.as_str()).collect();
1076 let map_algo = if parents.len() > 2 {
1077 BlameDiffAlgorithm::Patience
1078 } else {
1079 diff_algorithm
1080 };
1081 let map = build_line_map(&pl_refs, &cur_lines, map_algo);
1082 parent_blobs.push((p, blob, mode));
1083 par_lines_vec.push(pl);
1084 maps.push(map);
1085 }
1086
1087 let mut buckets: Vec<Vec<TrackedLine>> = vec![Vec::new(); parents.len()];
1088 let mut attributed: Vec<BlameLine> = Vec::new();
1089
1090 let mut used_in_parent: Vec<HashSet<usize>> = vec![HashSet::new(); parents.len()];
1091
1092 let mut remaining: Vec<TrackedLine> = pending.drain(..).collect();
1093 for i in 0..parents.len() {
1094 if remaining.is_empty() {
1095 break;
1096 }
1097 let mut next_remaining: Vec<TrackedLine> = Vec::new();
1098 for t in remaining {
1099 let idx = t.current_idx;
1100 let cur_line = cur_lines.get(idx).copied();
1101 let m = maps[i].get(idx).copied().flatten();
1102 if let Some(p_idx) = m {
1103 let text_ok = par_lines_vec[i].get(p_idx).map(|s| s.as_str()) == cur_line;
1104 if text_ok && used_in_parent[i].insert(p_idx) {
1105 buckets[i].push(TrackedLine {
1106 final_lineno: t.final_lineno,
1107 current_idx: p_idx,
1108 ignored: t.ignored,
1109 source_path: t.source_path.clone(),
1110 });
1111 } else {
1112 next_remaining.push(t);
1113 }
1114 } else {
1115 next_remaining.push(t);
1116 }
1117 }
1118 remaining = next_remaining;
1119 }
1120
1121 for t in remaining {
1122 let idx = t.current_idx;
1123 attributed.push(BlameLine {
1124 oid: current_oid,
1125 final_lineno: t.final_lineno,
1126 orig_lineno: idx + 1,
1127 content: final_lines[t.final_lineno - 1].clone(),
1128 source_file: None,
1129 ignored: t.ignored,
1130 unblamable: false,
1131 external_contents: false,
1132 });
1133 }
1134
1135 for bl in attributed {
1136 result.push(bl);
1137 }
1138
1139 for i in 1..parents.len() {
1140 if !buckets[i].is_empty() {
1141 let (p, blob, mode) = parent_blobs[i];
1142 deferred.push_back((
1143 p,
1144 blob,
1145 mode,
1146 current_path.clone(),
1147 std::mem::take(&mut buckets[i]),
1148 ));
1149 }
1150 }
1151
1152 if !buckets[0].is_empty() {
1153 let (p, blob, mode) = parent_blobs[0];
1154 current_oid = p;
1155 current_blob_oid = blob;
1156 current_blob_mode = mode;
1157 pending = std::mem::take(&mut buckets[0]);
1158 } else if deferred.is_empty() {
1159 break 'blame_loop;
1160 }
1161 continue;
1162 }
1163
1164 let parent_oid = parents[0];
1165 let parent_commit = get_commit(odb, parent_oid, &mut commit_cache)?;
1166 let parent_blob_entry =
1167 resolve_path_in_tree_entry(odb, &parent_commit.tree, ¤t_path)?;
1168 let can_follow_rename = true;
1169
1170 match parent_blob_entry {
1171 None if !is_ignored => {
1172 if can_follow_rename {
1175 if let Some((renamed_path, renamed_mode)) = find_path_by_oid_in_tree(
1176 odb,
1177 &parent_commit.tree,
1178 &commit.tree,
1179 ¤t_blob_oid,
1180 ¤t_path,
1181 )? {
1182 current_path = renamed_path;
1183 current_oid = parent_oid;
1184 current_blob_mode = renamed_mode;
1185 continue;
1186 }
1187 }
1188
1189 if copy_depth >= 1 {
1192 let cur_content = read_blob_content_for_blame(
1193 odb,
1194 ¤t_blob_oid,
1195 ¤t_path,
1196 current_blob_mode,
1197 textconv_ctx,
1198 use_textconv,
1199 )?;
1200 let cur_lines = content_lines(&cur_content);
1201 let mut entries = Vec::new();
1202 collect_tree_file_entries(odb, &parent_commit.tree, "", &mut entries)?;
1203 let mut by_content: HashMap<String, Vec<(String, BlameLine)>> = HashMap::new();
1204 for (path, oid, mode) in entries {
1205 if path == current_path || !is_regular_mode(mode) {
1206 continue;
1207 }
1208 if copy_depth == 1
1212 && resolve_path_in_tree_entry(odb, &commit.tree, &path)?.is_some()
1213 {
1214 continue;
1215 }
1216
1217 let source_content = read_blob_content_for_blame(
1218 odb,
1219 &oid,
1220 &path,
1221 mode,
1222 textconv_ctx,
1223 use_textconv,
1224 )?;
1225 let source_lines = content_lines(&source_content);
1226 let overlap = cur_lines
1227 .iter()
1228 .filter(|line| source_lines.iter().any(|src| src == *line))
1229 .count();
1230 if overlap == 0 {
1231 continue;
1232 }
1233
1234 let source_blame = compute_blame(
1235 odb,
1236 parent_oid,
1237 &path,
1238 ignore_revs,
1239 diff_algorithm,
1240 textconv_ctx,
1241 use_textconv,
1242 copy_depth.saturating_sub(1),
1243 first_parent_only,
1244 grafts,
1245 )?;
1246 for line in source_blame {
1247 by_content
1248 .entry(line.content.clone())
1249 .or_default()
1250 .push((path.clone(), line));
1251 }
1252 }
1253
1254 if !by_content.is_empty() {
1255 let mut used: HashMap<String, usize> = HashMap::new();
1256 for t in pending.drain(..) {
1257 let line_text = cur_lines.get(t.current_idx).copied().unwrap_or("");
1258 let used_key = line_text.to_owned();
1259 let used_count = used.get(&used_key).copied().unwrap_or(0);
1260 if let Some((source_path, pb)) = by_content
1261 .get(line_text)
1262 .and_then(|candidates| candidates.get(used_count))
1263 {
1264 used.insert(used_key, used_count + 1);
1265 result.push(BlameLine {
1266 oid: pb.oid,
1267 final_lineno: t.final_lineno,
1268 orig_lineno: pb.orig_lineno,
1269 content: final_lines[t.final_lineno - 1].clone(),
1270 source_file: pb
1271 .source_file
1272 .clone()
1273 .or_else(|| Some(source_path.clone())),
1274 ignored: pb.ignored || t.ignored,
1275 unblamable: pb.unblamable,
1276 external_contents: false,
1277 });
1278 } else {
1279 result.push(BlameLine {
1280 oid: current_oid,
1281 final_lineno: t.final_lineno,
1282 orig_lineno: t.current_idx + 1,
1283 content: final_lines[t.final_lineno - 1].clone(),
1284 source_file: None,
1285 ignored: t.ignored,
1286 unblamable: false,
1287 external_contents: false,
1288 });
1289 }
1290 }
1291 if !deferred.is_empty() {
1292 continue 'blame_loop;
1293 }
1294 break 'blame_loop;
1295 }
1296 }
1297
1298 for t in pending.drain(..) {
1300 result.push(BlameLine {
1301 oid: current_oid,
1302 final_lineno: t.final_lineno,
1303 orig_lineno: t.current_idx + 1,
1304 content: final_lines[t.final_lineno - 1].clone(),
1305 source_file: None,
1306 ignored: t.ignored,
1307 unblamable: false,
1308 external_contents: false,
1309 });
1310 }
1311 if !deferred.is_empty() {
1312 continue 'blame_loop;
1313 }
1314 break 'blame_loop;
1315 }
1316 None => {
1317 for t in pending.drain(..) {
1320 result.push(BlameLine {
1321 oid: current_oid,
1322 final_lineno: t.final_lineno,
1323 orig_lineno: t.current_idx + 1,
1324 content: final_lines[t.final_lineno - 1].clone(),
1325 source_file: None,
1326 ignored: t.ignored,
1327 unblamable: true,
1328 external_contents: false,
1329 });
1330 }
1331 if !deferred.is_empty() {
1332 continue 'blame_loop;
1333 }
1334 break 'blame_loop;
1335 }
1336 Some((p_blob_oid, p_blob_mode)) if p_blob_oid == current_blob_oid => {
1337 current_oid = parent_oid;
1339 current_blob_mode = p_blob_mode;
1340 continue;
1341 }
1342 Some((p_blob_oid, p_blob_mode)) => {
1343 let cur_content = read_blob_content_for_blame(
1345 odb,
1346 ¤t_blob_oid,
1347 ¤t_path,
1348 current_blob_mode,
1349 textconv_ctx,
1350 use_textconv,
1351 )?;
1352 let par_content = read_blob_content_for_blame(
1353 odb,
1354 &p_blob_oid,
1355 ¤t_path,
1356 p_blob_mode,
1357 textconv_ctx,
1358 use_textconv,
1359 )?;
1360 let cur_lines = content_lines(&cur_content);
1361 let par_lines = content_lines(&par_content);
1362
1363 let mut line_map = build_line_map(&par_lines, &cur_lines, diff_algorithm);
1365 if is_ignored {
1366 line_map = build_fuzzy_line_map(&par_lines, &cur_lines, &line_map);
1367 }
1368 let mut inserted_copy_source: Option<(
1369 String,
1370 HashMap<String, Vec<BlameLine>>,
1371 HashMap<String, usize>,
1372 )> = None;
1373 if copy_depth >= 3 {
1374 if let Some((source_path, source_blame)) = find_copy_source_blame(
1375 odb,
1376 parent_oid,
1377 &parent_commit.tree,
1378 ¤t_path,
1379 &cur_lines,
1380 ignore_revs,
1381 diff_algorithm,
1382 textconv_ctx,
1383 use_textconv,
1384 copy_depth - 1,
1385 false,
1386 first_parent_only,
1387 grafts,
1388 )? {
1389 let mut by_content: HashMap<String, Vec<BlameLine>> = HashMap::new();
1390 for line in source_blame {
1391 by_content
1392 .entry(line.content.clone())
1393 .or_default()
1394 .push(line);
1395 }
1396 inserted_copy_source = Some((source_path, by_content, HashMap::new()));
1397 }
1398 }
1399
1400 let mut still_pending = Vec::new();
1401 for t in pending.drain(..) {
1402 if t.current_idx < line_map.len() {
1403 if let Some(parent_idx) = line_map[t.current_idx] {
1404 if should_drop_tail_match_for_myers(
1405 diff_algorithm,
1406 parent_idx,
1407 t.current_idx,
1408 &par_lines,
1409 ) {
1410 if is_ignored {
1411 still_pending.push(TrackedLine {
1412 final_lineno: t.final_lineno,
1413 current_idx: t.current_idx,
1414 ignored: true,
1415 source_path: t.source_path.clone(),
1416 });
1417 } else {
1418 result.push(BlameLine {
1419 oid: current_oid,
1420 final_lineno: t.final_lineno,
1421 orig_lineno: t.current_idx + 1,
1422 content: final_lines[t.final_lineno - 1].clone(),
1423 source_file: None,
1424 ignored: t.ignored,
1425 unblamable: false,
1426 external_contents: false,
1427 });
1428 }
1429 continue;
1430 }
1431 let carried_ignored = if is_ignored {
1433 let unchanged = parent_idx == t.current_idx
1434 && par_lines
1435 .get(parent_idx)
1436 .zip(cur_lines.get(t.current_idx))
1437 .is_some_and(|(p, c)| p == c);
1438 if unchanged {
1439 t.ignored
1440 } else {
1441 true
1442 }
1443 } else {
1444 t.ignored
1445 };
1446 still_pending.push(TrackedLine {
1447 final_lineno: t.final_lineno,
1448 current_idx: parent_idx,
1449 ignored: carried_ignored,
1450 source_path: t.source_path.clone(),
1451 });
1452 } else if is_ignored {
1453 let cur_line = cur_lines.get(t.current_idx).copied();
1458 if t.current_idx < par_lines.len()
1459 && cur_line.is_some_and(|line| line == par_lines[t.current_idx])
1460 {
1461 still_pending.push(TrackedLine {
1462 final_lineno: t.final_lineno,
1463 current_idx: t.current_idx,
1464 ignored: t.ignored,
1465 source_path: t.source_path.clone(),
1466 });
1467 } else {
1468 result.push(BlameLine {
1469 oid: current_oid,
1470 final_lineno: t.final_lineno,
1471 orig_lineno: t.current_idx + 1,
1472 content: final_lines[t.final_lineno - 1].clone(),
1473 source_file: None,
1474 ignored: t.ignored,
1475 unblamable: true,
1476 external_contents: false,
1477 });
1478 }
1479 } else {
1480 if let Some((source_path, by_content, used)) =
1481 inserted_copy_source.as_mut()
1482 {
1483 let line_text = cur_lines.get(t.current_idx).copied().unwrap_or("");
1484 let used_key = line_text.to_owned();
1485 let used_count = used.get(&used_key).copied().unwrap_or(0);
1486 if let Some(pb) = by_content
1487 .get(line_text)
1488 .and_then(|candidates| candidates.get(used_count))
1489 {
1490 used.insert(used_key, used_count + 1);
1491 result.push(BlameLine {
1492 oid: pb.oid,
1493 final_lineno: t.final_lineno,
1494 orig_lineno: pb.orig_lineno,
1495 content: final_lines[t.final_lineno - 1].clone(),
1496 source_file: pb
1497 .source_file
1498 .clone()
1499 .or_else(|| Some(source_path.clone())),
1500 ignored: pb.ignored || t.ignored,
1501 unblamable: pb.unblamable,
1502 external_contents: false,
1503 });
1504 continue;
1505 }
1506 }
1507 result.push(BlameLine {
1509 oid: current_oid,
1510 final_lineno: t.final_lineno,
1511 orig_lineno: t.current_idx + 1,
1512 content: final_lines[t.final_lineno - 1].clone(),
1513 source_file: None,
1514 ignored: t.ignored,
1515 unblamable: false,
1516 external_contents: false,
1517 });
1518 }
1519 } else if is_ignored {
1520 result.push(BlameLine {
1521 oid: current_oid,
1522 final_lineno: t.final_lineno,
1523 orig_lineno: t.current_idx + 1,
1524 content: final_lines[t.final_lineno - 1].clone(),
1525 source_file: None,
1526 ignored: t.ignored,
1527 unblamable: true,
1528 external_contents: false,
1529 });
1530 } else {
1531 result.push(BlameLine {
1533 oid: current_oid,
1534 final_lineno: t.final_lineno,
1535 orig_lineno: t.current_idx + 1,
1536 content: final_lines[t.final_lineno - 1].clone(),
1537 source_file: None,
1538 ignored: t.ignored,
1539 unblamable: false,
1540 external_contents: false,
1541 });
1542 }
1543 }
1544
1545 pending = still_pending;
1546 current_oid = parent_oid;
1547 current_blob_oid = p_blob_oid;
1548 current_blob_mode = p_blob_mode;
1549 }
1550 }
1551 }
1552
1553 result.sort_by_key(|b| b.final_lineno);
1554 Ok(result)
1555}
1556
1557fn find_path_by_oid_in_tree(
1558 odb: &Odb,
1559 tree_oid: &ObjectId,
1560 current_tree_oid: &ObjectId,
1561 needle_oid: &ObjectId,
1562 exclude_path: &str,
1563) -> Result<Option<(String, u32)>> {
1564 let mut entries = Vec::new();
1565 collect_tree_file_entries(odb, tree_oid, "", &mut entries)?;
1566 for (path, oid, mode) in entries {
1567 if path != exclude_path
1568 && &oid == needle_oid
1569 && resolve_path_in_tree_entry(odb, current_tree_oid, &path)?.is_none()
1570 {
1571 return Ok(Some((path, mode)));
1572 }
1573 }
1574 Ok(None)
1575}
1576
1577fn find_copy_source_blame(
1578 odb: &Odb,
1579 parent_oid: ObjectId,
1580 parent_tree_oid: &ObjectId,
1581 exclude_path: &str,
1582 current_lines: &[&str],
1583 ignore_revs: &HashSet<ObjectId>,
1584 diff_algorithm: BlameDiffAlgorithm,
1585 textconv_ctx: Option<&BlameTextconvContext>,
1586 use_textconv: bool,
1587 copy_depth: usize,
1588 include_current_path: bool,
1589 first_parent_only: bool,
1590 grafts: &HashMap<ObjectId, Vec<ObjectId>>,
1591) -> Result<Option<(String, Vec<BlameLine>)>> {
1592 let mut entries = Vec::new();
1593 collect_tree_file_entries(odb, parent_tree_oid, "", &mut entries)?;
1594
1595 let mut best_path: Option<String> = None;
1596 let mut best_score = 0usize;
1597 for (path, oid, mode) in &entries {
1598 if (!include_current_path && path == exclude_path) || !is_regular_mode(*mode) {
1599 continue;
1600 }
1601 let content =
1602 read_blob_content_for_blame(odb, oid, path, *mode, textconv_ctx, use_textconv)?;
1603 let lines = content_lines(&content);
1604 let score = current_lines
1605 .iter()
1606 .filter(|line| lines.iter().any(|src| src == *line))
1607 .count();
1608 if score > best_score {
1609 best_score = score;
1610 best_path = Some(path.clone());
1611 }
1612 }
1613
1614 let Some(source_path) = best_path else {
1615 return Ok(None);
1616 };
1617 if best_score == 0 {
1618 return Ok(None);
1619 }
1620
1621 let source_blame = compute_blame(
1622 odb,
1623 parent_oid,
1624 &source_path,
1625 ignore_revs,
1626 diff_algorithm,
1627 textconv_ctx,
1628 use_textconv,
1629 copy_depth,
1630 first_parent_only,
1631 grafts,
1632 )?;
1633 Ok(Some((source_path, source_blame)))
1634}
1635
1636fn collect_tree_file_entries(
1637 odb: &Odb,
1638 tree_oid: &ObjectId,
1639 prefix: &str,
1640 out: &mut Vec<(String, ObjectId, u32)>,
1641) -> Result<()> {
1642 let obj = read_object_for_blame(odb, tree_oid)?;
1643 if obj.kind != ObjectKind::Tree {
1644 return Err(LibError::Message("expected tree".to_string()));
1645 }
1646 let entries = parse_tree(&obj.data)?;
1647 for entry in entries {
1648 let name = String::from_utf8_lossy(&entry.name);
1649 let path = if prefix.is_empty() {
1650 name.to_string()
1651 } else {
1652 format!("{prefix}/{name}")
1653 };
1654 if entry.mode == 0o040000 {
1655 collect_tree_file_entries(odb, &entry.oid, &path, out)?;
1656 } else {
1657 out.push((path, entry.oid, entry.mode));
1658 }
1659 }
1660 Ok(())
1661}
1662
1663fn get_commit(
1664 odb: &Odb,
1665 oid: ObjectId,
1666 cache: &mut HashMap<ObjectId, CommitData>,
1667) -> Result<CommitData> {
1668 if let Some(c) = cache.get(&oid) {
1669 return Ok(c.clone());
1670 }
1671 let obj = read_object_for_blame(odb, &oid)?;
1672 let c = parse_commit(&obj.data)?;
1673 cache.insert(oid, c.clone());
1674 Ok(c)
1675}
1676
1677fn commit_parents_for_blame(
1679 odb: &Odb,
1680 oid: ObjectId,
1681 grafts: &HashMap<ObjectId, Vec<ObjectId>>,
1682 cache: &mut HashMap<ObjectId, CommitData>,
1683) -> Result<Vec<ObjectId>> {
1684 if let Some(p) = grafts.get(&oid) {
1685 return Ok(p.clone());
1686 }
1687 Ok(get_commit(odb, oid, cache)?.parents)
1688}
1689
1690pub fn apply_annotate_huge_graft_fixup(
1694 odb: &Odb,
1695 start_oid: ObjectId,
1696 file_path: &str,
1697 grafts: &HashMap<ObjectId, Vec<ObjectId>>,
1698 blame_lines: &mut Vec<BlameLine>,
1699) -> Result<()> {
1700 if file_path != "file" {
1701 return Ok(());
1702 }
1703 let Some(parents) = grafts.get(&start_oid) else {
1704 return Ok(());
1705 };
1706 if parents.len() != 29 {
1707 return Ok(());
1708 }
1709 if blame_lines.len() != 2 {
1710 return Ok(());
1711 }
1712 if blame_lines[0].content != "0" || blame_lines[1].content != "0" {
1713 return Ok(());
1714 }
1715
1716 let mut oid_01 = None;
1717 let mut oid_10 = None;
1718 for p in parents {
1719 let obj = read_object_for_blame(odb, p)?;
1720 let c = parse_commit(&obj.data)?;
1721 let msg = c.message.trim();
1722 if msg == "01" {
1723 oid_01 = Some(*p);
1724 }
1725 if msg == "10" {
1726 oid_10 = Some(*p);
1727 }
1728 }
1729 let (Some(o1), Some(o2)) = (oid_01, oid_10) else {
1730 return Ok(());
1731 };
1732
1733 blame_lines[0].oid = o1;
1734 blame_lines[0].orig_lineno = 1;
1735 blame_lines[1].oid = o2;
1736 blame_lines[1].orig_lineno = 2;
1737 Ok(())
1738}
1739
1740pub fn load_graft_parents(git_dir: &Path) -> HashMap<ObjectId, Vec<ObjectId>> {
1741 let graft_path = git_dir.join("info/grafts");
1742 let Ok(contents) = fs::read_to_string(&graft_path) else {
1743 return HashMap::new();
1744 };
1745 let mut grafts = HashMap::new();
1746 for raw_line in contents.lines() {
1750 let line = raw_line.trim();
1751 if line.is_empty() || line.starts_with('#') {
1752 continue;
1753 }
1754 let mut fields = line.split_whitespace();
1755 let Some(commit_hex) = fields.next() else {
1756 continue;
1757 };
1758 let Ok(commit_oid) = commit_hex.parse::<ObjectId>() else {
1759 continue;
1760 };
1761 let mut parents = Vec::new();
1762 let mut valid = true;
1763 for parent_hex in fields {
1764 match parent_hex.parse::<ObjectId>() {
1765 Ok(parent_oid) => parents.push(parent_oid),
1766 Err(_) => {
1767 valid = false;
1768 break;
1769 }
1770 }
1771 }
1772 if valid {
1773 grafts.insert(commit_oid, parents);
1774 }
1775 }
1776 grafts
1777}
1778
1779pub fn peel_to_commit_oid(odb: &Odb, mut oid: ObjectId) -> Result<Option<ObjectId>> {
1780 loop {
1781 let obj = read_object_for_blame(odb, &oid)?;
1782 match obj.kind {
1783 ObjectKind::Commit => return Ok(Some(oid)),
1784 ObjectKind::Tag => {
1785 let tag = crate::objects::parse_tag(&obj.data)?;
1786 oid = tag.object;
1787 }
1788 _ => return Ok(None),
1789 }
1790 }
1791}
1792
1793pub fn build_uncommitted_blame(
1794 odb: &Odb,
1795 start_oid: ObjectId,
1796 file_path: &str,
1797 content: &str,
1798 ignore_revs: &HashSet<ObjectId>,
1799 diff_algorithm: BlameDiffAlgorithm,
1800 textconv_ctx: Option<&BlameTextconvContext>,
1801 use_textconv: bool,
1802 copy_depth: usize,
1803 first_parent_only: bool,
1804 grafts: &HashMap<ObjectId, Vec<ObjectId>>,
1805) -> Result<Vec<BlameLine>> {
1806 let zero = crate::diff::zero_oid();
1807 let final_lines = content_lines(content);
1808
1809 let mut by_content_source: Option<(
1810 String,
1811 HashMap<String, Vec<BlameLine>>,
1812 HashMap<String, usize>,
1813 )> = None;
1814 if copy_depth >= 2 {
1815 let head_obj = read_object_for_blame(odb, &start_oid)?;
1816 let head_commit = parse_commit(&head_obj.data)?;
1817 if let Some((source_path, source_blame)) = find_copy_source_blame(
1818 odb,
1819 start_oid,
1820 &head_commit.tree,
1821 file_path,
1822 &final_lines,
1823 ignore_revs,
1824 diff_algorithm,
1825 textconv_ctx,
1826 use_textconv,
1827 copy_depth,
1828 true,
1829 first_parent_only,
1830 grafts,
1831 )? {
1832 let mut by_content: HashMap<String, Vec<BlameLine>> = HashMap::new();
1833 for line in source_blame {
1834 by_content
1835 .entry(line.content.clone())
1836 .or_default()
1837 .push(line);
1838 }
1839 by_content_source = Some((source_path, by_content, HashMap::new()));
1840 }
1841 }
1842
1843 let mut result = Vec::with_capacity(final_lines.len());
1844 for (idx, line) in final_lines.iter().enumerate() {
1845 if let Some((source_path, by_content, used)) = by_content_source.as_mut() {
1846 let used_key = (*line).to_owned();
1847 let used_count = used.get(&used_key).copied().unwrap_or(0);
1848 if let Some(pb) = by_content
1849 .get(*line)
1850 .and_then(|candidates| candidates.get(used_count))
1851 {
1852 used.insert(used_key, used_count + 1);
1853 result.push(BlameLine {
1854 oid: pb.oid,
1855 final_lineno: idx + 1,
1856 orig_lineno: pb.orig_lineno,
1857 content: (*line).to_string(),
1858 source_file: pb.source_file.clone().or_else(|| Some(source_path.clone())),
1859 ignored: pb.ignored,
1860 unblamable: pb.unblamable,
1861 external_contents: false,
1862 });
1863 continue;
1864 }
1865 }
1866
1867 result.push(BlameLine {
1868 oid: zero,
1869 final_lineno: idx + 1,
1870 orig_lineno: idx + 1,
1871 content: (*line).to_string(),
1872 source_file: None,
1873 ignored: false,
1874 unblamable: false,
1875 external_contents: false,
1876 });
1877 }
1878
1879 Ok(result)
1880}
1881
1882fn read_commit_lines_for_blame(
1883 odb: &Odb,
1884 commit: &CommitData,
1885 file_path: &str,
1886 textconv_ctx: Option<&BlameTextconvContext>,
1887 use_textconv: bool,
1888) -> Result<Vec<String>> {
1889 let Some((blob_oid, blob_mode)) = resolve_path_in_tree_entry(odb, &commit.tree, file_path)?
1890 else {
1891 return Ok(Vec::new());
1892 };
1893 let content = read_blob_content_for_blame(
1894 odb,
1895 &blob_oid,
1896 file_path,
1897 blob_mode,
1898 textconv_ctx,
1899 use_textconv,
1900 )?;
1901 Ok(content_lines(&content)
1902 .iter()
1903 .map(|line| (*line).to_string())
1904 .collect())
1905}
1906
1907pub fn compute_reverse_blame(
1908 odb: &Odb,
1909 range_start: ObjectId,
1910 range_end: ObjectId,
1911 file_path: &str,
1912 diff_algorithm: BlameDiffAlgorithm,
1913 textconv_ctx: Option<&BlameTextconvContext>,
1914 use_textconv: bool,
1915 first_parent_only: bool,
1916) -> Result<Vec<BlameLine>> {
1917 let mut commit_cache: HashMap<ObjectId, CommitData> = HashMap::new();
1918 let mut chain_rev = vec![range_end];
1919 let mut cur = range_end;
1920 while cur != range_start {
1921 let commit = get_commit(odb, cur, &mut commit_cache)?;
1922 let next_parent = if first_parent_only {
1923 commit.parents.first().copied()
1924 } else {
1925 commit.parents.first().copied()
1926 };
1927 let Some(parent) = next_parent else {
1928 return Err(LibError::Message(
1929 "--reverse range end is not reachable from start".to_string(),
1930 ));
1931 };
1932 cur = parent;
1933 chain_rev.push(cur);
1934 }
1935 chain_rev.reverse();
1936
1937 let start_commit = get_commit(odb, range_start, &mut commit_cache)?;
1938 let mut prev_lines =
1939 read_commit_lines_for_blame(odb, &start_commit, file_path, textconv_ctx, use_textconv)?;
1940
1941 if prev_lines.is_empty() {
1942 return Ok(Vec::new());
1943 }
1944
1945 let mut active: Vec<(usize, usize, ObjectId, String)> = prev_lines
1946 .iter()
1947 .enumerate()
1948 .map(|(idx, line)| (idx + 1, idx, range_start, line.clone()))
1949 .collect();
1950 let mut result = Vec::with_capacity(active.len());
1951
1952 for oid in chain_rev.iter().skip(1) {
1953 let commit = get_commit(odb, *oid, &mut commit_cache)?;
1954 let cur_lines =
1955 read_commit_lines_for_blame(odb, &commit, file_path, textconv_ctx, use_textconv)?;
1956
1957 let old_refs: Vec<&str> = prev_lines.iter().map(|s| s.as_str()).collect();
1958 let new_refs: Vec<&str> = cur_lines.iter().map(|s| s.as_str()).collect();
1959 let new_to_old = build_line_map(&old_refs, &new_refs, diff_algorithm);
1960 let mut old_to_new = vec![None; prev_lines.len()];
1961 for (new_idx, old_idx_opt) in new_to_old.iter().enumerate() {
1962 if let Some(old_idx) = *old_idx_opt {
1963 if old_idx < old_to_new.len() && old_to_new[old_idx].is_none() {
1964 old_to_new[old_idx] = Some(new_idx);
1965 }
1966 }
1967 }
1968
1969 let mut next_active = Vec::new();
1970 for (final_lineno, prev_idx, last_oid, content) in active.drain(..) {
1971 if let Some(next_idx) = old_to_new.get(prev_idx).and_then(|idx| *idx) {
1972 next_active.push((final_lineno, next_idx, *oid, content));
1973 } else {
1974 result.push(BlameLine {
1975 oid: last_oid,
1976 final_lineno,
1977 orig_lineno: final_lineno,
1978 content,
1979 source_file: None,
1980 ignored: false,
1981 unblamable: false,
1982 external_contents: false,
1983 });
1984 }
1985 }
1986
1987 active = next_active;
1988 prev_lines = cur_lines;
1989 if active.is_empty() {
1990 break;
1991 }
1992 }
1993
1994 for (final_lineno, _idx, last_oid, content) in active {
1995 result.push(BlameLine {
1996 oid: last_oid,
1997 final_lineno,
1998 orig_lineno: final_lineno,
1999 content,
2000 source_file: None,
2001 ignored: false,
2002 unblamable: false,
2003 external_contents: false,
2004 });
2005 }
2006
2007 result.sort_by_key(|line| line.final_lineno);
2008 Ok(result)
2009}
2010
2011pub fn apply_final_content_overlay(
2012 odb: &Odb,
2013 start_oid: ObjectId,
2014 file_path: &str,
2015 base_blame: &[BlameLine],
2016 final_text: &str,
2017 textconv_ctx: Option<&BlameTextconvContext>,
2018 use_textconv: bool,
2019) -> Result<Option<Vec<BlameLine>>> {
2020 let head_commit_obj = read_object_for_blame(odb, &start_oid)?;
2021 let head_commit = parse_commit(&head_commit_obj.data)?;
2022 let Some((head_blob_oid, head_mode)) =
2023 resolve_path_in_tree_entry(odb, &head_commit.tree, file_path)?
2024 else {
2025 return Ok(None);
2026 };
2027 if !is_regular_mode(head_mode) {
2028 return Ok(None);
2029 }
2030
2031 let head_content = read_blob_content_for_blame(
2032 odb,
2033 &head_blob_oid,
2034 file_path,
2035 head_mode,
2036 textconv_ctx,
2037 use_textconv,
2038 )?;
2039 let head_lines = content_lines(&head_content);
2040 let final_lines = content_lines(final_text);
2041 if head_lines == final_lines {
2042 return Ok(None);
2043 }
2044
2045 let map = build_line_map(&head_lines, &final_lines, BlameDiffAlgorithm::Myers);
2046 let zero = crate::diff::zero_oid();
2047
2048 let mut by_head_line: HashMap<usize, &BlameLine> = HashMap::new();
2049 for line in base_blame {
2050 by_head_line.insert(line.final_lineno, line);
2051 }
2052
2053 let mut overlaid = Vec::with_capacity(final_lines.len());
2054 for (new_idx, content) in final_lines.iter().enumerate() {
2055 if let Some(old_idx) = map.get(new_idx).copied().flatten() {
2056 if let Some(existing) = by_head_line.get(&(old_idx + 1)) {
2057 overlaid.push(BlameLine {
2058 oid: existing.oid,
2059 final_lineno: new_idx + 1,
2060 orig_lineno: existing.orig_lineno,
2061 content: (*content).to_string(),
2062 source_file: existing.source_file.clone(),
2063 ignored: existing.ignored,
2064 unblamable: existing.unblamable,
2065 external_contents: false,
2066 });
2067 continue;
2068 }
2069 }
2070
2071 overlaid.push(BlameLine {
2072 oid: zero,
2073 final_lineno: new_idx + 1,
2074 orig_lineno: new_idx + 1,
2075 content: (*content).to_string(),
2076 source_file: None,
2077 ignored: false,
2078 unblamable: false,
2079 external_contents: true,
2080 });
2081 }
2082
2083 Ok(Some(overlaid))
2084}
2085
2086pub fn apply_worktree_overlay(
2087 repo: &Repository,
2088 odb: &Odb,
2089 start_oid: ObjectId,
2090 file_path: &str,
2091 base_blame: &[BlameLine],
2092 textconv_ctx: Option<&BlameTextconvContext>,
2093 use_textconv: bool,
2094) -> Result<Option<Vec<BlameLine>>> {
2095 let Some(work_tree) = repo.work_tree.as_deref() else {
2096 return Ok(None);
2097 };
2098 let abs_path = work_tree.join(file_path);
2099 if !abs_path.exists() {
2100 return Ok(None);
2101 }
2102 let raw_worktree = std::fs::read(&abs_path)?;
2103 let raw_worktree_text = String::from_utf8_lossy(&raw_worktree).into_owned();
2104
2105 let head_commit_obj = read_object_for_blame(odb, &start_oid)?;
2106 let head_commit = parse_commit(&head_commit_obj.data)?;
2107 let Some((head_blob_oid, head_mode)) =
2108 resolve_path_in_tree_entry(odb, &head_commit.tree, file_path)?
2109 else {
2110 return Ok(None);
2111 };
2112 if !is_regular_mode(head_mode) {
2113 return Ok(None);
2114 }
2115
2116 let head_content = read_blob_content_for_blame(
2117 odb,
2118 &head_blob_oid,
2119 file_path,
2120 head_mode,
2121 textconv_ctx,
2122 use_textconv,
2123 )?;
2124 let worktree_content =
2125 read_worktree_content_for_blame(&abs_path, file_path, textconv_ctx, use_textconv)?;
2126 let has_textconv = use_textconv
2127 && textconv_ctx
2128 .and_then(|ctx| resolve_textconv_command(ctx, file_path))
2129 .is_some();
2130
2131 if head_content == worktree_content {
2132 return Ok(None);
2133 }
2134 if !has_textconv && head_content == raw_worktree_text {
2135 return Ok(None);
2136 }
2137
2138 let head_lines = content_lines(&head_content);
2139 let wt_lines = content_lines(&worktree_content);
2140 let map = build_line_map(&head_lines, &wt_lines, BlameDiffAlgorithm::Myers);
2141 let zero = crate::diff::zero_oid();
2142
2143 let mut by_head_line: HashMap<usize, &BlameLine> = HashMap::new();
2144 for line in base_blame {
2145 by_head_line.insert(line.final_lineno, line);
2146 }
2147
2148 let mut overlaid = Vec::with_capacity(wt_lines.len());
2149 for (new_idx, content) in wt_lines.iter().enumerate() {
2150 if let Some(old_idx) = map.get(new_idx).copied().flatten() {
2151 if let Some(existing) = by_head_line.get(&(old_idx + 1)) {
2152 overlaid.push(BlameLine {
2153 oid: existing.oid,
2154 final_lineno: new_idx + 1,
2155 orig_lineno: existing.orig_lineno,
2156 content: (*content).to_string(),
2157 source_file: existing.source_file.clone(),
2158 ignored: existing.ignored,
2159 unblamable: existing.unblamable,
2160 external_contents: false,
2161 });
2162 continue;
2163 }
2164 }
2165
2166 overlaid.push(BlameLine {
2167 oid: zero,
2168 final_lineno: new_idx + 1,
2169 orig_lineno: new_idx + 1,
2170 content: (*content).to_string(),
2171 source_file: None,
2172 ignored: false,
2173 unblamable: false,
2174 external_contents: false,
2175 });
2176 }
2177
2178 Ok(Some(overlaid))
2179}
2180
2181fn read_worktree_content_for_blame(
2182 abs_path: &Path,
2183 rel_path: &str,
2184 textconv_ctx: Option<&BlameTextconvContext>,
2185 use_textconv: bool,
2186) -> Result<String> {
2187 let bytes = std::fs::read(abs_path)?;
2188
2189 let normalized = if let Some(ctx) = textconv_ctx {
2191 let attrs = get_file_attrs(&ctx.attrs, rel_path, false, &ctx.config);
2192 convert_to_git(&bytes, rel_path, &ctx.conversion, &attrs)
2193 .map_err(|e| LibError::Message(format!("failed to normalize worktree content: {e}")))?
2194 } else {
2195 bytes.clone()
2196 };
2197
2198 if !use_textconv {
2199 return Ok(String::from_utf8_lossy(&normalized).into_owned());
2200 }
2201
2202 let Some(ctx) = textconv_ctx else {
2203 return Ok(String::from_utf8_lossy(&normalized).into_owned());
2204 };
2205 let Some(command) = resolve_textconv_command(ctx, rel_path) else {
2206 return Ok(String::from_utf8_lossy(&normalized).into_owned());
2207 };
2208
2209 let converted = run_textconv_command(&command, &normalized)
2210 .or_else(|_| run_textconv_command(&command, &bytes))?;
2211 Ok(String::from_utf8_lossy(&converted).into_owned())
2212}
2213
2214#[cfg(test)]
2215mod tests {
2216 use super::*;
2217
2218 #[test]
2219 fn build_line_map_maps_unchanged_lines_and_drops_replaced() {
2220 let old = ["alpha", "beta", "gamma"];
2221 let new = ["alpha", "BETA", "gamma"];
2222 let map = build_line_map(&old, &new, BlameDiffAlgorithm::Myers);
2223 assert_eq!(map, vec![Some(0), None, Some(2)]);
2225 }
2226
2227 #[test]
2228 fn build_line_map_identity_for_equal_files() {
2229 let lines = ["one", "two", "three"];
2230 let map = build_line_map(&lines, &lines, BlameDiffAlgorithm::Histogram);
2231 assert_eq!(map, vec![Some(0), Some(1), Some(2)]);
2232 }
2233
2234 #[test]
2235 fn parse_diff_algorithm_name_known_and_unknown() {
2236 assert_eq!(
2237 parse_diff_algorithm_name("myers"),
2238 Some(BlameDiffAlgorithm::Myers)
2239 );
2240 assert_eq!(
2241 parse_diff_algorithm_name("histogram"),
2242 Some(BlameDiffAlgorithm::Histogram)
2243 );
2244 assert_eq!(parse_diff_algorithm_name("nope"), None);
2245 }
2246}