1use super::{process_changes, Change, UnblamedHunk};
2use crate::{BlameEntry, Error, Options, Outcome, Statistics};
3use gix_diff::blob::intern::TokenSource;
4use gix_diff::tree::Visit;
5use gix_hash::ObjectId;
6use gix_object::{
7 bstr::{BStr, BString},
8 FindExt,
9};
10use gix_traverse::commit::find as find_commit;
11use smallvec::SmallVec;
12use std::num::NonZeroU32;
13use std::ops::Range;
14
15pub fn file(
65 odb: impl gix_object::Find + gix_object::FindHeader,
66 suspect: ObjectId,
67 cache: Option<gix_commitgraph::Graph>,
68 resource_cache: &mut gix_diff::blob::Platform,
69 file_path: &BStr,
70 options: Options,
71) -> Result<Outcome, Error> {
72 let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);
73
74 let mut stats = Statistics::default();
75 let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
76 let blamed_file_entry_id = find_path_entry_in_commit(
77 &odb,
78 &suspect,
79 file_path,
80 cache.as_ref(),
81 &mut buf,
82 &mut buf2,
83 &mut stats,
84 )?
85 .ok_or_else(|| Error::FileMissing {
86 file_path: file_path.to_owned(),
87 commit_id: suspect,
88 })?;
89 let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
90 let num_lines_in_blamed = tokens_for_diffing(&blamed_file_blob).tokenize().count() as u32;
91
92 if num_lines_in_blamed == 0 {
94 return Ok(Outcome::default());
95 }
96
97 let range_in_blamed_file = one_based_inclusive_to_zero_based_exclusive_range(options.range, num_lines_in_blamed)?;
98 let mut hunks_to_blame = vec![UnblamedHunk {
99 range_in_blamed_file: range_in_blamed_file.clone(),
100 suspects: [(suspect, range_in_blamed_file)].into(),
101 }];
102
103 let (mut buf, mut buf2) = (Vec::new(), Vec::new());
104 let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
105 let mut queue: gix_revwalk::PriorityQueue<CommitTime, ObjectId> = gix_revwalk::PriorityQueue::new();
106 queue.insert(commit_time(commit)?, suspect);
107
108 let mut out = Vec::new();
109 let mut diff_state = gix_diff::tree::State::default();
110 let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
111 'outer: while let Some(suspect) = queue.pop_value() {
112 stats.commits_traversed += 1;
113 if hunks_to_blame.is_empty() {
114 break;
115 }
116
117 let is_still_suspect = hunks_to_blame.iter().any(|hunk| hunk.suspects.contains_key(&suspect));
118 if !is_still_suspect {
119 continue 'outer;
122 }
123
124 let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
125 let commit_time = commit_time(commit)?;
126
127 if let Some(since) = options.since {
128 if commit_time < since.seconds {
129 if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
130 break 'outer;
131 }
132
133 continue;
134 }
135 }
136
137 let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref(), &mut buf2)?;
138
139 if parent_ids.is_empty() {
140 if queue.is_empty() {
141 if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
147 break 'outer;
148 }
149 }
150 continue;
152 }
153
154 let mut entry = previous_entry
155 .take()
156 .filter(|(id, _)| *id == suspect)
157 .map(|(_, entry)| entry);
158 if entry.is_none() {
159 entry = find_path_entry_in_commit(
160 &odb,
161 &suspect,
162 file_path,
163 cache.as_ref(),
164 &mut buf,
165 &mut buf2,
166 &mut stats,
167 )?;
168 }
169
170 let Some(entry_id) = entry else {
171 continue;
172 };
173
174 #[cfg(debug_assertions)]
177 {
178 let source_blob = odb.find_blob(&entry_id, &mut buf)?.data.to_vec();
179 let mut source_interner = gix_diff::blob::intern::Interner::new(source_blob.len() / 100);
180 let source_lines_as_tokens: Vec<_> = tokens_for_diffing(&source_blob)
181 .tokenize()
182 .map(|token| source_interner.intern(token))
183 .collect();
184
185 let mut blamed_interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100);
186 let blamed_lines_as_tokens: Vec<_> = tokens_for_diffing(&blamed_file_blob)
187 .tokenize()
188 .map(|token| blamed_interner.intern(token))
189 .collect();
190
191 for hunk in hunks_to_blame.iter() {
192 if let Some(range_in_suspect) = hunk.suspects.get(&suspect) {
193 let range_in_blamed_file = hunk.range_in_blamed_file.clone();
194
195 for (blamed_line_number, source_line_number) in range_in_blamed_file.zip(range_in_suspect.clone()) {
196 let source_token = source_lines_as_tokens[source_line_number as usize];
197 let blame_token = blamed_lines_as_tokens[blamed_line_number as usize];
198
199 let source_line = BString::new(source_interner[source_token].into());
200 let blamed_line = BString::new(blamed_interner[blame_token].into());
201
202 assert_eq!(source_line, blamed_line);
203 }
204 }
205 }
206 }
207
208 for (pid, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() {
209 if let Some(parent_entry_id) = find_path_entry_in_commit(
210 &odb,
211 parent_id,
212 file_path,
213 cache.as_ref(),
214 &mut buf,
215 &mut buf2,
216 &mut stats,
217 )? {
218 let no_change_in_entry = entry_id == parent_entry_id;
219 if pid == 0 {
220 previous_entry = Some((*parent_id, parent_entry_id));
221 }
222 if no_change_in_entry {
223 pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
224 queue.insert(*parent_commit_time, *parent_id);
225 continue 'outer;
226 }
227 }
228 }
229
230 let more_than_one_parent = parent_ids.len() > 1;
231 for (parent_id, parent_commit_time) in parent_ids {
232 queue.insert(parent_commit_time, parent_id);
233 let changes_for_file_path = tree_diff_at_file_path(
234 &odb,
235 file_path,
236 suspect,
237 parent_id,
238 cache.as_ref(),
239 &mut stats,
240 &mut diff_state,
241 &mut buf,
242 &mut buf2,
243 &mut buf3,
244 )?;
245 let Some(modification) = changes_for_file_path else {
246 if more_than_one_parent {
247 for unblamed_hunk in &mut hunks_to_blame {
250 unblamed_hunk.clone_blame(suspect, parent_id);
251 }
252 } else {
253 pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
254 }
255 continue;
256 };
257
258 match modification {
259 gix_diff::tree::recorder::Change::Addition { .. } => {
260 if more_than_one_parent {
261 } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
265 break 'outer;
266 }
267 }
268 gix_diff::tree::recorder::Change::Deletion { .. } => {
269 unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted")
270 }
271 gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
272 let changes = blob_changes(
273 &odb,
274 resource_cache,
275 oid,
276 previous_oid,
277 file_path,
278 options.diff_algorithm,
279 &mut stats,
280 )?;
281 hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
282 }
283 }
284 }
285
286 hunks_to_blame.retain_mut(|unblamed_hunk| {
287 if unblamed_hunk.suspects.len() == 1 {
288 if let Some(entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) {
289 out.push(entry);
294 return false;
295 }
296 }
297 unblamed_hunk.remove_blame(suspect);
298 true
299 });
300
301 #[cfg(debug_assertions)]
305 {
306 let ranges = hunks_to_blame.iter().fold(
307 std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
308 |mut acc, hunk| {
309 for (suspect, range) in hunk.suspects.clone() {
310 acc.entry(suspect).or_default().push(range);
311 }
312
313 acc
314 },
315 );
316
317 for (_, mut ranges) in ranges {
318 ranges.sort_by(|a, b| a.start.cmp(&b.start));
319
320 for window in ranges.windows(2) {
321 if let [a, b] = window {
322 assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
323 }
324 }
325 }
326 }
327 }
328
329 debug_assert_eq!(
330 hunks_to_blame,
331 vec![],
332 "only if there is no portion of the file left we have completed the blame"
333 );
334
335 out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
338 Ok(Outcome {
339 entries: coalesce_blame_entries(out),
340 blob: blamed_file_blob,
341 statistics: stats,
342 })
343}
344
345fn one_based_inclusive_to_zero_based_exclusive_range(
349 range: Option<Range<u32>>,
350 max_lines: u32,
351) -> Result<Range<u32>, Error> {
352 let Some(range) = range else { return Ok(0..max_lines) };
353 if range.start == 0 {
354 return Err(Error::InvalidLineRange);
355 }
356 let start = range.start - 1;
357 let end = range.end;
358 if start >= max_lines || end > max_lines || start == end {
359 return Err(Error::InvalidLineRange);
360 }
361 Ok(start..end)
362}
363
364fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<UnblamedHunk>) {
368 for unblamed_hunk in hunks_to_blame {
369 unblamed_hunk.pass_blame(from, to);
370 }
371}
372
373fn unblamed_to_out_is_done(
377 hunks_to_blame: &mut Vec<UnblamedHunk>,
378 out: &mut Vec<BlameEntry>,
379 suspect: ObjectId,
380) -> bool {
381 let mut without_suspect = Vec::new();
382 out.extend(hunks_to_blame.drain(..).filter_map(|hunk| {
383 BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| {
384 without_suspect.push(hunk);
385 None
386 })
387 }));
388 *hunks_to_blame = without_suspect;
389 hunks_to_blame.is_empty()
390}
391
392fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
402 let len = lines_blamed.len();
403 lines_blamed
404 .into_iter()
405 .fold(Vec::with_capacity(len), |mut acc, entry| {
406 let previous_entry = acc.last();
407
408 if let Some(previous_entry) = previous_entry {
409 let previous_blamed_range = previous_entry.range_in_blamed_file();
410 let current_blamed_range = entry.range_in_blamed_file();
411 let previous_source_range = previous_entry.range_in_source_file();
412 let current_source_range = entry.range_in_source_file();
413 if previous_entry.commit_id == entry.commit_id
414 && previous_blamed_range.end == current_blamed_range.start
415 && previous_source_range.end == current_source_range.start
417 {
418 let coalesced_entry = BlameEntry {
420 start_in_blamed_file: previous_blamed_range.start as u32,
421 start_in_source_file: previous_source_range.start as u32,
422 len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32)
423 .expect("BUG: hunks are never zero-sized"),
424 commit_id: previous_entry.commit_id,
425 };
426
427 acc.pop();
428 acc.push(coalesced_entry);
429 } else {
430 acc.push(entry);
431 }
432
433 acc
434 } else {
435 acc.push(entry);
436
437 acc
438 }
439 })
440}
441
442#[allow(clippy::too_many_arguments)]
443fn tree_diff_at_file_path(
444 odb: impl gix_object::Find + gix_object::FindHeader,
445 file_path: &BStr,
446 id: ObjectId,
447 parent_id: ObjectId,
448 cache: Option<&gix_commitgraph::Graph>,
449 stats: &mut Statistics,
450 state: &mut gix_diff::tree::State,
451 commit_buf: &mut Vec<u8>,
452 lhs_tree_buf: &mut Vec<u8>,
453 rhs_tree_buf: &mut Vec<u8>,
454) -> Result<Option<gix_diff::tree::recorder::Change>, Error> {
455 let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?;
456
457 let parent_tree_iter = odb.find_tree_iter(&parent_tree_id, lhs_tree_buf)?;
458 stats.trees_decoded += 1;
459
460 let tree_id = find_commit(cache, &odb, &id, commit_buf)?.tree_id()?;
461
462 let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?;
463 stats.trees_decoded += 1;
464
465 struct FindChangeToPath {
466 inner: gix_diff::tree::Recorder,
467 interesting_path: BString,
468 change: Option<gix_diff::tree::recorder::Change>,
469 }
470
471 impl FindChangeToPath {
472 fn new(interesting_path: BString) -> Self {
473 let inner =
474 gix_diff::tree::Recorder::default().track_location(Some(gix_diff::tree::recorder::Location::Path));
475
476 FindChangeToPath {
477 inner,
478 interesting_path,
479 change: None,
480 }
481 }
482 }
483
484 impl Visit for FindChangeToPath {
485 fn pop_front_tracked_path_and_set_current(&mut self) {
486 self.inner.pop_front_tracked_path_and_set_current();
487 }
488
489 fn push_back_tracked_path_component(&mut self, component: &BStr) {
490 self.inner.push_back_tracked_path_component(component);
491 }
492
493 fn push_path_component(&mut self, component: &BStr) {
494 self.inner.push_path_component(component);
495 }
496
497 fn pop_path_component(&mut self) {
498 self.inner.pop_path_component();
499 }
500
501 fn visit(&mut self, change: gix_diff::tree::visit::Change) -> gix_diff::tree::visit::Action {
502 use gix_diff::tree::visit;
503 use gix_diff::tree::visit::Change::*;
504
505 if self.inner.path() == self.interesting_path {
506 self.change = Some(match change {
507 Deletion {
508 entry_mode,
509 oid,
510 relation,
511 } => gix_diff::tree::recorder::Change::Deletion {
512 entry_mode,
513 oid,
514 path: self.inner.path_clone(),
515 relation,
516 },
517 Addition {
518 entry_mode,
519 oid,
520 relation,
521 } => gix_diff::tree::recorder::Change::Addition {
522 entry_mode,
523 oid,
524 path: self.inner.path_clone(),
525 relation,
526 },
527 Modification {
528 previous_entry_mode,
529 previous_oid,
530 entry_mode,
531 oid,
532 } => gix_diff::tree::recorder::Change::Modification {
533 previous_entry_mode,
534 previous_oid,
535 entry_mode,
536 oid,
537 path: self.inner.path_clone(),
538 },
539 });
540
541 visit::Action::Cancel
542 } else {
543 visit::Action::Continue
544 }
545 }
546 }
547
548 let mut recorder = FindChangeToPath::new(file_path.into());
549 let result = gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder);
550 stats.trees_diffed += 1;
551
552 match result {
553 Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change),
554 Err(error) => Err(Error::DiffTree(error)),
555 }
556}
557
558fn blob_changes(
559 odb: impl gix_object::Find + gix_object::FindHeader,
560 resource_cache: &mut gix_diff::blob::Platform,
561 oid: ObjectId,
562 previous_oid: ObjectId,
563 file_path: &BStr,
564 diff_algorithm: gix_diff::blob::Algorithm,
565 stats: &mut Statistics,
566) -> Result<Vec<Change>, Error> {
567 struct ChangeRecorder {
569 last_seen_after_end: u32,
570 hunks: Vec<Change>,
571 total_number_of_lines: u32,
572 }
573
574 impl ChangeRecorder {
575 fn new(total_number_of_lines: u32) -> Self {
578 ChangeRecorder {
579 last_seen_after_end: 0,
580 hunks: Vec::new(),
581 total_number_of_lines,
582 }
583 }
584 }
585
586 impl gix_diff::blob::Sink for ChangeRecorder {
587 type Out = Vec<Change>;
588
589 fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
590 if after.start > self.last_seen_after_end {
592 self.hunks
593 .push(Change::Unchanged(self.last_seen_after_end..after.start));
594 }
595
596 match (!before.is_empty(), !after.is_empty()) {
597 (_, true) => {
598 self.hunks.push(Change::AddedOrReplaced(
599 after.start..after.end,
600 before.end - before.start,
601 ));
602 }
603 (true, false) => {
604 self.hunks.push(Change::Deleted(after.start, before.end - before.start));
605 }
606 (false, false) => unreachable!("BUG: imara-diff provided a non-change"),
607 }
608 self.last_seen_after_end = after.end;
609 }
610
611 fn finish(mut self) -> Self::Out {
612 if self.total_number_of_lines > self.last_seen_after_end {
613 self.hunks
614 .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines));
615 }
616 self.hunks
617 }
618 }
619
620 resource_cache.set_resource(
621 previous_oid,
622 gix_object::tree::EntryKind::Blob,
623 file_path,
624 gix_diff::blob::ResourceKind::OldOrSource,
625 &odb,
626 )?;
627 resource_cache.set_resource(
628 oid,
629 gix_object::tree::EntryKind::Blob,
630 file_path,
631 gix_diff::blob::ResourceKind::NewOrDestination,
632 &odb,
633 )?;
634
635 let outcome = resource_cache.prepare_diff()?;
636 let input = gix_diff::blob::intern::InternedInput::new(
637 tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()),
638 tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()),
639 );
640 let number_of_lines_in_destination = input.after.len();
641 let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32);
642
643 let res = gix_diff::blob::diff(diff_algorithm, &input, change_recorder);
644 stats.blobs_diffed += 1;
645 Ok(res)
646}
647
648fn find_path_entry_in_commit(
649 odb: &impl gix_object::Find,
650 commit: &gix_hash::oid,
651 file_path: &BStr,
652 cache: Option<&gix_commitgraph::Graph>,
653 buf: &mut Vec<u8>,
654 buf2: &mut Vec<u8>,
655 stats: &mut Statistics,
656) -> Result<Option<ObjectId>, Error> {
657 let tree_id = find_commit(cache, odb, commit, buf)?.tree_id()?;
658 let tree_iter = odb.find_tree_iter(&tree_id, buf)?;
659 stats.trees_decoded += 1;
660
661 let res = tree_iter.lookup_entry(
662 odb,
663 buf2,
664 file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
665 )?;
666 stats.trees_decoded -= 1;
667 Ok(res.map(|e| e.oid))
668}
669
670type CommitTime = i64;
671
672fn commit_time(commit: gix_traverse::commit::Either<'_, '_>) -> Result<CommitTime, gix_object::decode::Error> {
673 match commit {
674 gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => {
675 commit_ref_iter.committer().map(|c| c.time.seconds)
676 }
677 gix_traverse::commit::Either::CachedCommit(commit) => Ok(commit.committer_timestamp() as i64),
678 }
679}
680
681type ParentIds = SmallVec<[(gix_hash::ObjectId, i64); 2]>;
682
683fn collect_parents(
684 commit: gix_traverse::commit::Either<'_, '_>,
685 odb: &impl gix_object::Find,
686 cache: Option<&gix_commitgraph::Graph>,
687 buf: &mut Vec<u8>,
688) -> Result<ParentIds, Error> {
689 let mut parent_ids: ParentIds = Default::default();
690 match commit {
691 gix_traverse::commit::Either::CachedCommit(commit) => {
692 let cache = cache
693 .as_ref()
694 .expect("find returned a cached commit, so we expect cache to be present");
695 for parent_pos in commit.iter_parents() {
696 let parent = cache.commit_at(parent_pos?);
697 parent_ids.push((parent.id().to_owned(), parent.committer_timestamp() as i64));
698 }
699 }
700 gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => {
701 for id in commit_ref_iter.parent_ids() {
702 let parent = odb.find_commit_iter(id.as_ref(), buf).ok();
703 let parent_commit_time = parent
704 .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
705 .unwrap_or_default();
706 parent_ids.push((id, parent_commit_time));
707 }
708 }
709 }
710 Ok(parent_ids)
711}
712
713pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
716 gix_diff::blob::sources::byte_lines_with_terminator(data)
717}