1use super::{process_changes, Change, UnblamedHunk};
2use crate::{BlameEntry, Error, Outcome, Statistics};
3use gix_diff::blob::intern::TokenSource;
4use gix_diff::tree::Visit;
5use gix_hash::ObjectId;
6use gix_object::{
7 bstr::{BStr, BString},
8 FindExt,
9};
10use std::num::NonZeroU32;
11use std::ops::Range;
12
13pub fn file<E>(
64 odb: impl gix_object::Find + gix_object::FindHeader,
65 traverse: impl IntoIterator<Item = Result<gix_traverse::commit::Info, E>>,
66 resource_cache: &mut gix_diff::blob::Platform,
67 file_path: &BStr,
68 range: Option<Range<u32>>,
69) -> Result<Outcome, Error>
70where
71 E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
72{
73 let mut traverse = traverse.into_iter().peekable();
74 let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else {
75 return Err(Error::EmptyTraversal);
76 };
77 let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);
78
79 let mut stats = Statistics::default();
80 let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
81 let blamed_file_entry_id = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?
82 .ok_or_else(|| Error::FileMissing {
83 file_path: file_path.to_owned(),
84 commit_id: suspect,
85 })?;
86 let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
87 let num_lines_in_blamed = {
88 let mut interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100);
89 tokens_for_diffing(&blamed_file_blob)
90 .tokenize()
91 .map(|token| interner.intern(token))
92 .count()
93 } as u32;
94
95 if num_lines_in_blamed == 0 {
97 return Ok(Outcome::default());
98 }
99
100 let range_in_blamed_file = one_based_inclusive_to_zero_based_exclusive_range(range, num_lines_in_blamed)?;
101 let mut hunks_to_blame = vec![UnblamedHunk {
102 range_in_blamed_file: range_in_blamed_file.clone(),
103 suspects: [(suspect, range_in_blamed_file)].into(),
104 }];
105
106 let mut out = Vec::new();
107 let mut diff_state = gix_diff::tree::State::default();
108 let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
109 'outer: while let Some(item) = traverse.next() {
110 if hunks_to_blame.is_empty() {
111 break;
112 }
113 let commit = item.map_err(|err| Error::Traverse(err.into()))?;
114 let suspect = commit.id;
115 stats.commits_traversed += 1;
116
117 let parent_ids = commit.parent_ids;
118 if parent_ids.is_empty() {
119 if traverse.peek().is_none() {
120 if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
126 break 'outer;
127 }
128 }
129
130 continue;
132 }
133
134 let mut entry = previous_entry
135 .take()
136 .filter(|(id, _)| *id == suspect)
137 .map(|(_, entry)| entry);
138 if entry.is_none() {
139 entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?;
140 }
141
142 let Some(entry_id) = entry else {
143 continue;
144 };
145
146 for (pid, parent_id) in parent_ids.iter().enumerate() {
147 if let Some(parent_entry_id) =
148 find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)?
149 {
150 let no_change_in_entry = entry_id == parent_entry_id;
151 if pid == 0 {
152 previous_entry = Some((*parent_id, parent_entry_id));
153 }
154 if no_change_in_entry {
155 pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
156 continue 'outer;
157 }
158 }
159 }
160
161 let more_than_one_parent = parent_ids.len() > 1;
162 for parent_id in parent_ids {
163 let changes_for_file_path = tree_diff_at_file_path(
164 &odb,
165 file_path,
166 commit.id,
167 parent_id,
168 &mut stats,
169 &mut diff_state,
170 &mut buf,
171 &mut buf2,
172 &mut buf3,
173 )?;
174 let Some(modification) = changes_for_file_path else {
175 if more_than_one_parent {
176 for unblamed_hunk in &mut hunks_to_blame {
179 unblamed_hunk.clone_blame(suspect, parent_id);
180 }
181 } else {
182 pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
183 }
184 continue;
185 };
186
187 match modification {
188 gix_diff::tree::recorder::Change::Addition { .. } => {
189 if more_than_one_parent {
190 } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
194 break 'outer;
195 }
196 }
197 gix_diff::tree::recorder::Change::Deletion { .. } => {
198 unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted")
199 }
200 gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
201 let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?;
202 hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
203 }
204 }
205 }
206
207 hunks_to_blame.retain_mut(|unblamed_hunk| {
208 if unblamed_hunk.suspects.len() == 1 {
209 if let Some(entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) {
210 out.push(entry);
215 return false;
216 }
217 }
218 unblamed_hunk.remove_blame(suspect);
219 true
220 });
221
222 #[cfg(debug_assertions)]
226 {
227 let ranges = hunks_to_blame.iter().fold(
228 std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
229 |mut acc, hunk| {
230 for (suspect, range) in hunk.suspects.clone() {
231 acc.entry(suspect).or_default().push(range);
232 }
233
234 acc
235 },
236 );
237
238 for (_, mut ranges) in ranges {
239 ranges.sort_by(|a, b| a.start.cmp(&b.start));
240
241 for window in ranges.windows(2) {
242 if let [a, b] = window {
243 assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
244 }
245 }
246 }
247 }
248 }
249
250 debug_assert_eq!(
251 hunks_to_blame,
252 vec![],
253 "only if there is no portion of the file left we have completed the blame"
254 );
255
256 out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
259 Ok(Outcome {
260 entries: coalesce_blame_entries(out),
261 blob: blamed_file_blob,
262 statistics: stats,
263 })
264}
265
266fn one_based_inclusive_to_zero_based_exclusive_range(
270 range: Option<Range<u32>>,
271 max_lines: u32,
272) -> Result<Range<u32>, Error> {
273 let Some(range) = range else { return Ok(0..max_lines) };
274 if range.start == 0 {
275 return Err(Error::InvalidLineRange);
276 }
277 let start = range.start - 1;
278 let end = range.end;
279 if start >= max_lines || end > max_lines || start == end {
280 return Err(Error::InvalidLineRange);
281 }
282 Ok(start..end)
283}
284
285fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<UnblamedHunk>) {
289 for unblamed_hunk in hunks_to_blame {
290 unblamed_hunk.pass_blame(from, to);
291 }
292}
293
294fn unblamed_to_out_is_done(
298 hunks_to_blame: &mut Vec<UnblamedHunk>,
299 out: &mut Vec<BlameEntry>,
300 suspect: ObjectId,
301) -> bool {
302 let mut without_suspect = Vec::new();
303 out.extend(hunks_to_blame.drain(..).filter_map(|hunk| {
304 BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| {
305 without_suspect.push(hunk);
306 None
307 })
308 }));
309 *hunks_to_blame = without_suspect;
310 hunks_to_blame.is_empty()
311}
312
313fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
323 let len = lines_blamed.len();
324 lines_blamed
325 .into_iter()
326 .fold(Vec::with_capacity(len), |mut acc, entry| {
327 let previous_entry = acc.last();
328
329 if let Some(previous_entry) = previous_entry {
330 let previous_blamed_range = previous_entry.range_in_blamed_file();
331 let current_blamed_range = entry.range_in_blamed_file();
332 let previous_source_range = previous_entry.range_in_source_file();
333 let current_source_range = entry.range_in_source_file();
334 if previous_entry.commit_id == entry.commit_id
335 && previous_blamed_range.end == current_blamed_range.start
336 && previous_source_range.end == current_source_range.start
338 {
339 let coalesced_entry = BlameEntry {
341 start_in_blamed_file: previous_blamed_range.start as u32,
342 start_in_source_file: previous_source_range.start as u32,
343 len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32)
344 .expect("BUG: hunks are never zero-sized"),
345 commit_id: previous_entry.commit_id,
346 };
347
348 acc.pop();
349 acc.push(coalesced_entry);
350 } else {
351 acc.push(entry);
352 }
353
354 acc
355 } else {
356 acc.push(entry);
357
358 acc
359 }
360 })
361}
362
363#[allow(clippy::too_many_arguments)]
364fn tree_diff_at_file_path(
365 odb: impl gix_object::Find + gix_object::FindHeader,
366 file_path: &BStr,
367 id: ObjectId,
368 parent_id: ObjectId,
369 stats: &mut Statistics,
370 state: &mut gix_diff::tree::State,
371 commit_buf: &mut Vec<u8>,
372 lhs_tree_buf: &mut Vec<u8>,
373 rhs_tree_buf: &mut Vec<u8>,
374) -> Result<Option<gix_diff::tree::recorder::Change>, Error> {
375 let parent_tree = odb.find_commit(&parent_id, commit_buf)?.tree();
376 stats.commits_to_tree += 1;
377
378 let parent_tree_iter = odb.find_tree_iter(&parent_tree, lhs_tree_buf)?;
379 stats.trees_decoded += 1;
380
381 let tree_id = odb.find_commit(&id, commit_buf)?.tree();
382 stats.commits_to_tree += 1;
383
384 let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?;
385 stats.trees_decoded += 1;
386
387 struct FindChangeToPath {
388 inner: gix_diff::tree::Recorder,
389 interesting_path: BString,
390 change: Option<gix_diff::tree::recorder::Change>,
391 }
392
393 impl FindChangeToPath {
394 fn new(interesting_path: BString) -> Self {
395 let inner =
396 gix_diff::tree::Recorder::default().track_location(Some(gix_diff::tree::recorder::Location::Path));
397
398 FindChangeToPath {
399 inner,
400 interesting_path,
401 change: None,
402 }
403 }
404 }
405
406 impl Visit for FindChangeToPath {
407 fn pop_front_tracked_path_and_set_current(&mut self) {
408 self.inner.pop_front_tracked_path_and_set_current();
409 }
410
411 fn push_back_tracked_path_component(&mut self, component: &BStr) {
412 self.inner.push_back_tracked_path_component(component);
413 }
414
415 fn push_path_component(&mut self, component: &BStr) {
416 self.inner.push_path_component(component);
417 }
418
419 fn pop_path_component(&mut self) {
420 self.inner.pop_path_component();
421 }
422
423 fn visit(&mut self, change: gix_diff::tree::visit::Change) -> gix_diff::tree::visit::Action {
424 use gix_diff::tree::visit;
425 use gix_diff::tree::visit::Change::*;
426
427 if self.inner.path() == self.interesting_path {
428 self.change = Some(match change {
429 Deletion {
430 entry_mode,
431 oid,
432 relation,
433 } => gix_diff::tree::recorder::Change::Deletion {
434 entry_mode,
435 oid,
436 path: self.inner.path_clone(),
437 relation,
438 },
439 Addition {
440 entry_mode,
441 oid,
442 relation,
443 } => gix_diff::tree::recorder::Change::Addition {
444 entry_mode,
445 oid,
446 path: self.inner.path_clone(),
447 relation,
448 },
449 Modification {
450 previous_entry_mode,
451 previous_oid,
452 entry_mode,
453 oid,
454 } => gix_diff::tree::recorder::Change::Modification {
455 previous_entry_mode,
456 previous_oid,
457 entry_mode,
458 oid,
459 path: self.inner.path_clone(),
460 },
461 });
462
463 visit::Action::Cancel
464 } else {
465 visit::Action::Continue
466 }
467 }
468 }
469
470 let mut recorder = FindChangeToPath::new(file_path.into());
471 let result = gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder);
472 stats.trees_diffed += 1;
473
474 match result {
475 Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change),
476 Err(error) => Err(Error::DiffTree(error)),
477 }
478}
479
480fn blob_changes(
481 odb: impl gix_object::Find + gix_object::FindHeader,
482 resource_cache: &mut gix_diff::blob::Platform,
483 oid: ObjectId,
484 previous_oid: ObjectId,
485 file_path: &BStr,
486 stats: &mut Statistics,
487) -> Result<Vec<Change>, Error> {
488 struct ChangeRecorder {
490 last_seen_after_end: u32,
491 hunks: Vec<Change>,
492 total_number_of_lines: u32,
493 }
494
495 impl ChangeRecorder {
496 fn new(total_number_of_lines: u32) -> Self {
499 ChangeRecorder {
500 last_seen_after_end: 0,
501 hunks: Vec::new(),
502 total_number_of_lines,
503 }
504 }
505 }
506
507 impl gix_diff::blob::Sink for ChangeRecorder {
508 type Out = Vec<Change>;
509
510 fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
511 if after.start > self.last_seen_after_end {
513 self.hunks
514 .push(Change::Unchanged(self.last_seen_after_end..after.start));
515 }
516
517 match (!before.is_empty(), !after.is_empty()) {
518 (_, true) => {
519 self.hunks.push(Change::AddedOrReplaced(
520 after.start..after.end,
521 before.end - before.start,
522 ));
523 }
524 (true, false) => {
525 self.hunks.push(Change::Deleted(after.start, before.end - before.start));
526 }
527 (false, false) => unreachable!("BUG: imara-diff provided a non-change"),
528 }
529 self.last_seen_after_end = after.end;
530 }
531
532 fn finish(mut self) -> Self::Out {
533 if self.total_number_of_lines > self.last_seen_after_end {
534 self.hunks
535 .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines));
536 }
537 self.hunks
538 }
539 }
540
541 resource_cache.set_resource(
542 previous_oid,
543 gix_object::tree::EntryKind::Blob,
544 file_path,
545 gix_diff::blob::ResourceKind::OldOrSource,
546 &odb,
547 )?;
548 resource_cache.set_resource(
549 oid,
550 gix_object::tree::EntryKind::Blob,
551 file_path,
552 gix_diff::blob::ResourceKind::NewOrDestination,
553 &odb,
554 )?;
555
556 let outcome = resource_cache.prepare_diff()?;
557 let input = gix_diff::blob::intern::InternedInput::new(
558 tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()),
559 tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()),
560 );
561 let number_of_lines_in_destination = input.after.len();
562 let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32);
563
564 let res = gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder);
565 stats.blobs_diffed += 1;
566 Ok(res)
567}
568
569fn find_path_entry_in_commit(
570 odb: &impl gix_object::Find,
571 commit: &gix_hash::oid,
572 file_path: &BStr,
573 buf: &mut Vec<u8>,
574 buf2: &mut Vec<u8>,
575 stats: &mut Statistics,
576) -> Result<Option<ObjectId>, Error> {
577 let commit_id = odb.find_commit(commit, buf)?.tree();
578 stats.commits_to_tree += 1;
579 let tree_iter = odb.find_tree_iter(&commit_id, buf)?;
580 stats.trees_decoded += 1;
581
582 let res = tree_iter.lookup_entry(
583 odb,
584 buf2,
585 file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
586 )?;
587 stats.trees_decoded -= 1;
588 Ok(res.map(|e| e.oid))
589}
590
591pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
594 gix_diff::blob::sources::byte_lines_with_terminator(data)
595}