gix_blame/file/
function.rs

1use std::{num::NonZeroU32, ops::Range};
2
3use gix_diff::{blob::intern::TokenSource, tree::Visit};
4use gix_hash::ObjectId;
5use gix_object::{
6    bstr::{BStr, BString},
7    FindExt,
8};
9use gix_traverse::commit::find as find_commit;
10use smallvec::SmallVec;
11
12use super::{process_changes, Change, UnblamedHunk};
13use crate::{types::BlamePathEntry, BlameEntry, Error, Options, Outcome, Statistics};
14
15/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
16/// at `suspect:<file_path>` originated in.
17///
18/// ## Parameters
19///
20/// * `odb`
21///    - Access to database objects, also for used for diffing.
22///    - Should have an object cache for good diff performance.
23/// * `suspect`
24///    - The first commit to be responsible for parts of `file_path`.
25/// * `cache`
26///    - Optionally, the commitgraph cache.
27/// * `file_path`
28///    - A *slash-separated* worktree-relative path to the file to blame.
29/// * `range`
30///    - A 1-based inclusive range, in order to mirror `git`’s behaviour. `Some(20..40)` represents
31///      21 lines, spanning from line 20 up to and including line 40. This will be converted to
32///      `19..40` internally as the algorithm uses 0-based ranges that are exclusive at the end.
33/// * `resource_cache`
34///    - Used for diffing trees.
35///
36/// ## The algorithm
37///
38/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that
39/// represent the worktree state.
40///
41/// We begin with one or more *Unblamed Hunks* and a single suspect, usually the `HEAD` commit as the commit containing the
42/// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*.
43/// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`.
44/// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes.
45/// Now the *Unblamed Hunk* is split at the boundaries of each matching change, creating a new *Unblamed Hunk* on each side,
46/// along with a [`BlameEntry`] to represent the match.
47/// This is repeated until there are no non-empty *Unblamed Hunk*s left.
48///
49/// At a high level, what we want to do is the following:
50///
51/// - get the commit
52/// - walk through its parents
53///   - for each parent, do a diff and mark lines that don’t have a suspect yet (this is the term
54///     used in `libgit2`), but that have been changed in this commit
55///
56/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame
57/// suspects. It can be visualized as follows:
58///
59/// <---------------------------------------->
60/// <---------------><----------------------->
61/// <---><----------><----------------------->
62/// <---><----------><-------><-----><------->
63/// <---><---><-----><-------><-----><------->
64/// <---><---><-----><-------><-----><-><-><->
65pub fn file(
66    odb: impl gix_object::Find + gix_object::FindHeader,
67    suspect: ObjectId,
68    cache: Option<gix_commitgraph::Graph>,
69    resource_cache: &mut gix_diff::blob::Platform,
70    file_path: &BStr,
71    options: Options,
72) -> Result<Outcome, Error> {
73    let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);
74
75    let mut stats = Statistics::default();
76    let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
77    let blamed_file_entry_id = find_path_entry_in_commit(
78        &odb,
79        &suspect,
80        file_path,
81        cache.as_ref(),
82        &mut buf,
83        &mut buf2,
84        &mut stats,
85    )?
86    .ok_or_else(|| Error::FileMissing {
87        file_path: file_path.to_owned(),
88        commit_id: suspect,
89    })?;
90    let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
91    let num_lines_in_blamed = tokens_for_diffing(&blamed_file_blob).tokenize().count() as u32;
92
93    // Binary or otherwise empty?
94    if num_lines_in_blamed == 0 {
95        return Ok(Outcome::default());
96    }
97
98    let ranges = options.range.to_zero_based_exclusive(num_lines_in_blamed)?;
99    let mut hunks_to_blame = Vec::with_capacity(ranges.len());
100
101    for range in ranges {
102        hunks_to_blame.push(UnblamedHunk {
103            range_in_blamed_file: range.clone(),
104            suspects: [(suspect, range)].into(),
105            source_file_name: None,
106        });
107    }
108
109    let (mut buf, mut buf2) = (Vec::new(), Vec::new());
110    let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
111    let mut queue: gix_revwalk::PriorityQueue<gix_date::SecondsSinceUnixEpoch, ObjectId> =
112        gix_revwalk::PriorityQueue::new();
113    queue.insert(commit.commit_time()?, suspect);
114
115    let mut out = Vec::new();
116    let mut diff_state = gix_diff::tree::State::default();
117    let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
118    let mut blame_path = if options.debug_track_path {
119        Some(Vec::new())
120    } else {
121        None
122    };
123
124    'outer: while let Some(suspect) = queue.pop_value() {
125        stats.commits_traversed += 1;
126        if hunks_to_blame.is_empty() {
127            break;
128        }
129
130        let first_hunk_for_suspect = hunks_to_blame.iter().find(|hunk| hunk.has_suspect(&suspect));
131        let Some(first_hunk_for_suspect) = first_hunk_for_suspect else {
132            // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with
133            // the next one.
134            continue 'outer;
135        };
136
137        let current_file_path = first_hunk_for_suspect
138            .source_file_name
139            .clone()
140            .unwrap_or_else(|| file_path.to_owned());
141
142        let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
143        let commit_time = commit.commit_time()?;
144
145        if let Some(since) = options.since {
146            if commit_time < since.seconds {
147                if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
148                    break 'outer;
149                }
150
151                continue;
152            }
153        }
154
155        let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref(), &mut buf2)?;
156
157        if parent_ids.is_empty() {
158            if queue.is_empty() {
159                // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the
160                // `id` of the last `item` that was yielded by `queue`, so it makes sense to assign
161                // the remaining lines to it, even though we don’t explicitly check whether that is
162                // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
163                // an empty tree to validate this assumption.
164                if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
165                    if let Some(ref mut blame_path) = blame_path {
166                        let entry = previous_entry
167                            .take()
168                            .filter(|(id, _)| *id == suspect)
169                            .map(|(_, entry)| entry);
170
171                        let blame_path_entry = BlamePathEntry {
172                            source_file_path: current_file_path.clone(),
173                            previous_source_file_path: None,
174                            commit_id: suspect,
175                            blob_id: entry.unwrap_or(ObjectId::null(gix_hash::Kind::Sha1)),
176                            previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
177                            parent_index: 0,
178                        };
179                        blame_path.push(blame_path_entry);
180                    }
181
182                    break 'outer;
183                }
184            }
185            // There is more, keep looking.
186            continue;
187        }
188
189        let mut entry = previous_entry
190            .take()
191            .filter(|(id, _)| *id == suspect)
192            .map(|(_, entry)| entry);
193        if entry.is_none() {
194            entry = find_path_entry_in_commit(
195                &odb,
196                &suspect,
197                current_file_path.as_ref(),
198                cache.as_ref(),
199                &mut buf,
200                &mut buf2,
201                &mut stats,
202            )?;
203        }
204
205        let Some(entry_id) = entry else {
206            continue;
207        };
208
209        // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are
210        // identical to the corresponding lines in the *Source File*.
211        #[cfg(debug_assertions)]
212        {
213            let source_blob = odb.find_blob(&entry_id, &mut buf)?.data.to_vec();
214            let mut source_interner = gix_diff::blob::intern::Interner::new(source_blob.len() / 100);
215            let source_lines_as_tokens: Vec<_> = tokens_for_diffing(&source_blob)
216                .tokenize()
217                .map(|token| source_interner.intern(token))
218                .collect();
219
220            let mut blamed_interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100);
221            let blamed_lines_as_tokens: Vec<_> = tokens_for_diffing(&blamed_file_blob)
222                .tokenize()
223                .map(|token| blamed_interner.intern(token))
224                .collect();
225
226            for hunk in hunks_to_blame.iter() {
227                if let Some(range_in_suspect) = hunk.get_range(&suspect) {
228                    let range_in_blamed_file = hunk.range_in_blamed_file.clone();
229
230                    let source_lines = range_in_suspect
231                        .clone()
232                        .map(|i| BString::new(source_interner[source_lines_as_tokens[i as usize]].into()))
233                        .collect::<Vec<_>>();
234                    let blamed_lines = range_in_blamed_file
235                        .clone()
236                        .map(|i| BString::new(blamed_interner[blamed_lines_as_tokens[i as usize]].into()))
237                        .collect::<Vec<_>>();
238
239                    assert_eq!(source_lines, blamed_lines);
240                }
241            }
242        }
243
244        for (pid, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() {
245            if let Some(parent_entry_id) = find_path_entry_in_commit(
246                &odb,
247                parent_id,
248                current_file_path.as_ref(),
249                cache.as_ref(),
250                &mut buf,
251                &mut buf2,
252                &mut stats,
253            )? {
254                let no_change_in_entry = entry_id == parent_entry_id;
255                if pid == 0 {
256                    previous_entry = Some((*parent_id, parent_entry_id));
257                }
258                if no_change_in_entry {
259                    pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
260                    queue.insert(*parent_commit_time, *parent_id);
261                    continue 'outer;
262                }
263            }
264        }
265
266        let more_than_one_parent = parent_ids.len() > 1;
267        for (index, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() {
268            queue.insert(*parent_commit_time, *parent_id);
269            let changes_for_file_path = tree_diff_at_file_path(
270                &odb,
271                current_file_path.as_ref(),
272                suspect,
273                *parent_id,
274                cache.as_ref(),
275                &mut stats,
276                &mut diff_state,
277                resource_cache,
278                &mut buf,
279                &mut buf2,
280                &mut buf3,
281                options.rewrites,
282            )?;
283            let Some(modification) = changes_for_file_path else {
284                if more_than_one_parent {
285                    // None of the changes affected the file we’re currently blaming.
286                    // Copy blame to parent.
287                    for unblamed_hunk in &mut hunks_to_blame {
288                        unblamed_hunk.clone_blame(suspect, *parent_id);
289                    }
290                } else {
291                    pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
292                }
293                continue;
294            };
295
296            match modification {
297                TreeDiffChange::Addition { id } => {
298                    if more_than_one_parent {
299                        // Do nothing under the assumption that this always (or almost always)
300                        // implies that the file comes from a different parent, compared to which
301                        // it was modified, not added.
302                    } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
303                        if let Some(ref mut blame_path) = blame_path {
304                            let blame_path_entry = BlamePathEntry {
305                                source_file_path: current_file_path.clone(),
306                                previous_source_file_path: None,
307                                commit_id: suspect,
308                                blob_id: id,
309                                previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
310                                parent_index: index,
311                            };
312                            blame_path.push(blame_path_entry);
313                        }
314
315                        break 'outer;
316                    }
317                }
318                TreeDiffChange::Deletion => {
319                    unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted")
320                }
321                TreeDiffChange::Modification { previous_id, id } => {
322                    let changes = blob_changes(
323                        &odb,
324                        resource_cache,
325                        id,
326                        previous_id,
327                        file_path,
328                        file_path,
329                        options.diff_algorithm,
330                        &mut stats,
331                    )?;
332                    hunks_to_blame = process_changes(hunks_to_blame, changes.clone(), suspect, *parent_id);
333                    if let Some(ref mut blame_path) = blame_path {
334                        let has_blame_been_passed = hunks_to_blame.iter().any(|hunk| hunk.has_suspect(parent_id));
335
336                        if has_blame_been_passed {
337                            let blame_path_entry = BlamePathEntry {
338                                source_file_path: current_file_path.clone(),
339                                previous_source_file_path: Some(current_file_path.clone()),
340                                commit_id: suspect,
341                                blob_id: id,
342                                previous_blob_id: previous_id,
343                                parent_index: index,
344                            };
345                            blame_path.push(blame_path_entry);
346                        }
347                    }
348                }
349                TreeDiffChange::Rewrite {
350                    source_location,
351                    source_id,
352                    id,
353                } => {
354                    let changes = blob_changes(
355                        &odb,
356                        resource_cache,
357                        id,
358                        source_id,
359                        file_path,
360                        source_location.as_ref(),
361                        options.diff_algorithm,
362                        &mut stats,
363                    )?;
364                    hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, *parent_id);
365
366                    let mut has_blame_been_passed = false;
367
368                    for hunk in hunks_to_blame.iter_mut() {
369                        if hunk.has_suspect(parent_id) {
370                            hunk.source_file_name = Some(source_location.clone());
371
372                            has_blame_been_passed = true;
373                        }
374                    }
375
376                    if has_blame_been_passed {
377                        if let Some(ref mut blame_path) = blame_path {
378                            let blame_path_entry = BlamePathEntry {
379                                source_file_path: current_file_path.clone(),
380                                previous_source_file_path: Some(source_location.clone()),
381                                commit_id: suspect,
382                                blob_id: id,
383                                previous_blob_id: source_id,
384                                parent_index: index,
385                            };
386                            blame_path.push(blame_path_entry);
387                        }
388                    }
389                }
390            }
391        }
392
393        hunks_to_blame.retain_mut(|unblamed_hunk| {
394            if unblamed_hunk.suspects.len() == 1 {
395                if let Some(entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) {
396                    // At this point, we have copied blame for every hunk to a parent. Hunks
397                    // that have only `suspect` left in `suspects` have not passed blame to any
398                    // parent, and so they can be converted to a `BlameEntry` and moved to
399                    // `out`.
400                    out.push(entry);
401                    return false;
402                }
403            }
404            unblamed_hunk.remove_blame(suspect);
405            true
406        });
407    }
408
409    debug_assert_eq!(
410        hunks_to_blame,
411        vec![],
412        "only if there is no portion of the file left we have completed the blame"
413    );
414
415    // I don’t know yet whether it would make sense to use a data structure instead that preserves
416    // order on insertion.
417    out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
418    Ok(Outcome {
419        entries: coalesce_blame_entries(out),
420        blob: blamed_file_blob,
421        statistics: stats,
422        blame_path,
423    })
424}
425
426/// Pass ownership of each unblamed hunk of `from` to `to`.
427///
428/// This happens when `from` didn't actually change anything in the blamed file.
429fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<UnblamedHunk>) {
430    for unblamed_hunk in hunks_to_blame {
431        unblamed_hunk.pass_blame(from, to);
432    }
433}
434
435/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process.
436///
437/// Return `true` if we are done because `hunks_to_blame` is empty.
438fn unblamed_to_out_is_done(
439    hunks_to_blame: &mut Vec<UnblamedHunk>,
440    out: &mut Vec<BlameEntry>,
441    suspect: ObjectId,
442) -> bool {
443    let mut without_suspect = Vec::new();
444    out.extend(hunks_to_blame.drain(..).filter_map(|hunk| {
445        BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| {
446            without_suspect.push(hunk);
447            None
448        })
449    }));
450    *hunks_to_blame = without_suspect;
451    hunks_to_blame.is_empty()
452}
453
454/// This function merges adjacent blame entries. It merges entries that are adjacent both in the
455/// blamed file and in the source file that introduced them. This follows `git`’s
456/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the
457/// blamed file which can result in different blames in certain edge cases. See [the commit][1]
458/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test
459/// for this behaviour in `git`.
460///
461/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a
462/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131
463fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
464    let len = lines_blamed.len();
465    lines_blamed
466        .into_iter()
467        .fold(Vec::with_capacity(len), |mut acc, entry| {
468            let previous_entry = acc.last();
469
470            if let Some(previous_entry) = previous_entry {
471                let previous_blamed_range = previous_entry.range_in_blamed_file();
472                let current_blamed_range = entry.range_in_blamed_file();
473                let previous_source_range = previous_entry.range_in_source_file();
474                let current_source_range = entry.range_in_source_file();
475                if previous_entry.commit_id == entry.commit_id
476                    && previous_blamed_range.end == current_blamed_range.start
477                    // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`.
478                    && previous_source_range.end == current_source_range.start
479                {
480                    let coalesced_entry = BlameEntry {
481                        start_in_blamed_file: previous_blamed_range.start as u32,
482                        start_in_source_file: previous_source_range.start as u32,
483                        len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32)
484                            .expect("BUG: hunks are never zero-sized"),
485                        commit_id: previous_entry.commit_id,
486                        source_file_name: previous_entry.source_file_name.clone(),
487                    };
488
489                    acc.pop();
490                    acc.push(coalesced_entry);
491                } else {
492                    acc.push(entry);
493                }
494
495                acc
496            } else {
497                acc.push(entry);
498
499                acc
500            }
501        })
502}
503
504/// The union of [`gix_diff::tree::recorder::Change`] and [`gix_diff::tree_with_rewrites::Change`],
505/// keeping only the blame-relevant information.
506enum TreeDiffChange {
507    Addition {
508        id: ObjectId,
509    },
510    Deletion,
511    Modification {
512        previous_id: ObjectId,
513        id: ObjectId,
514    },
515    Rewrite {
516        source_location: BString,
517        source_id: ObjectId,
518        id: ObjectId,
519    },
520}
521
522impl From<gix_diff::tree::recorder::Change> for TreeDiffChange {
523    fn from(value: gix_diff::tree::recorder::Change) -> Self {
524        use gix_diff::tree::recorder::Change;
525
526        match value {
527            Change::Addition { oid, .. } => Self::Addition { id: oid },
528            Change::Deletion { .. } => Self::Deletion,
529            Change::Modification { previous_oid, oid, .. } => Self::Modification {
530                previous_id: previous_oid,
531                id: oid,
532            },
533        }
534    }
535}
536
537impl From<gix_diff::tree_with_rewrites::Change> for TreeDiffChange {
538    fn from(value: gix_diff::tree_with_rewrites::Change) -> Self {
539        use gix_diff::tree_with_rewrites::Change;
540
541        match value {
542            Change::Addition { id, .. } => Self::Addition { id },
543            Change::Deletion { .. } => Self::Deletion,
544            Change::Modification { previous_id, id, .. } => Self::Modification { previous_id, id },
545            Change::Rewrite {
546                source_location,
547                source_id,
548                id,
549                ..
550            } => Self::Rewrite {
551                source_location,
552                source_id,
553                id,
554            },
555        }
556    }
557}
558
559#[allow(clippy::too_many_arguments)]
560fn tree_diff_at_file_path(
561    odb: impl gix_object::Find + gix_object::FindHeader,
562    file_path: &BStr,
563    id: ObjectId,
564    parent_id: ObjectId,
565    cache: Option<&gix_commitgraph::Graph>,
566    stats: &mut Statistics,
567    state: &mut gix_diff::tree::State,
568    resource_cache: &mut gix_diff::blob::Platform,
569    commit_buf: &mut Vec<u8>,
570    lhs_tree_buf: &mut Vec<u8>,
571    rhs_tree_buf: &mut Vec<u8>,
572    rewrites: Option<gix_diff::Rewrites>,
573) -> Result<Option<TreeDiffChange>, Error> {
574    let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?;
575
576    let parent_tree_iter = odb.find_tree_iter(&parent_tree_id, lhs_tree_buf)?;
577    stats.trees_decoded += 1;
578
579    let tree_id = find_commit(cache, &odb, &id, commit_buf)?.tree_id()?;
580
581    let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?;
582    stats.trees_decoded += 1;
583
584    let result = tree_diff_without_rewrites_at_file_path(&odb, file_path, stats, state, parent_tree_iter, tree_iter)?;
585
586    // Here, we follow git’s behaviour. We return when we’ve found a `Modification`. We try a
587    // second time with rename tracking when the change is either an `Addition` or a `Deletion`
588    // because those can turn out to have been a `Rewrite`.
589    // TODO(perf): renames are usually rare enough to not care about the work duplication done here.
590    //             But in theory, a rename tracker could be used by us, on demand, and we could stuff the
591    //             changes in there and have it find renames, without repeating the diff.
592    if matches!(result, Some(TreeDiffChange::Modification { .. })) {
593        return Ok(result);
594    }
595    let Some(rewrites) = rewrites else {
596        return Ok(result);
597    };
598
599    let result = tree_diff_with_rewrites_at_file_path(
600        &odb,
601        file_path,
602        stats,
603        state,
604        resource_cache,
605        parent_tree_iter,
606        tree_iter,
607        rewrites,
608    )?;
609
610    Ok(result)
611}
612
613#[allow(clippy::too_many_arguments)]
614fn tree_diff_without_rewrites_at_file_path(
615    odb: impl gix_object::Find + gix_object::FindHeader,
616    file_path: &BStr,
617    stats: &mut Statistics,
618    state: &mut gix_diff::tree::State,
619    parent_tree_iter: gix_object::TreeRefIter<'_>,
620    tree_iter: gix_object::TreeRefIter<'_>,
621) -> Result<Option<TreeDiffChange>, Error> {
622    struct FindChangeToPath {
623        inner: gix_diff::tree::Recorder,
624        interesting_path: BString,
625        change: Option<gix_diff::tree::recorder::Change>,
626    }
627
628    impl FindChangeToPath {
629        fn new(interesting_path: BString) -> Self {
630            let inner =
631                gix_diff::tree::Recorder::default().track_location(Some(gix_diff::tree::recorder::Location::Path));
632
633            FindChangeToPath {
634                inner,
635                interesting_path,
636                change: None,
637            }
638        }
639    }
640
641    impl Visit for FindChangeToPath {
642        fn pop_front_tracked_path_and_set_current(&mut self) {
643            self.inner.pop_front_tracked_path_and_set_current();
644        }
645
646        fn push_back_tracked_path_component(&mut self, component: &BStr) {
647            self.inner.push_back_tracked_path_component(component);
648        }
649
650        fn push_path_component(&mut self, component: &BStr) {
651            self.inner.push_path_component(component);
652        }
653
654        fn pop_path_component(&mut self) {
655            self.inner.pop_path_component();
656        }
657
658        fn visit(&mut self, change: gix_diff::tree::visit::Change) -> gix_diff::tree::visit::Action {
659            use gix_diff::tree::{visit, visit::Change::*};
660
661            if self.inner.path() == self.interesting_path {
662                self.change = Some(match change {
663                    Deletion {
664                        entry_mode,
665                        oid,
666                        relation,
667                    } => gix_diff::tree::recorder::Change::Deletion {
668                        entry_mode,
669                        oid,
670                        path: self.inner.path_clone(),
671                        relation,
672                    },
673                    Addition {
674                        entry_mode,
675                        oid,
676                        relation,
677                    } => gix_diff::tree::recorder::Change::Addition {
678                        entry_mode,
679                        oid,
680                        path: self.inner.path_clone(),
681                        relation,
682                    },
683                    Modification {
684                        previous_entry_mode,
685                        previous_oid,
686                        entry_mode,
687                        oid,
688                    } => gix_diff::tree::recorder::Change::Modification {
689                        previous_entry_mode,
690                        previous_oid,
691                        entry_mode,
692                        oid,
693                        path: self.inner.path_clone(),
694                    },
695                });
696
697                visit::Action::Cancel
698            } else {
699                visit::Action::Continue
700            }
701        }
702    }
703
704    let mut recorder = FindChangeToPath::new(file_path.into());
705    let result = gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder);
706    stats.trees_diffed += 1;
707
708    match result {
709        Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change.map(Into::into)),
710        Err(error) => Err(Error::DiffTree(error)),
711    }
712}
713
714#[allow(clippy::too_many_arguments)]
715fn tree_diff_with_rewrites_at_file_path(
716    odb: impl gix_object::Find + gix_object::FindHeader,
717    file_path: &BStr,
718    stats: &mut Statistics,
719    state: &mut gix_diff::tree::State,
720    resource_cache: &mut gix_diff::blob::Platform,
721    parent_tree_iter: gix_object::TreeRefIter<'_>,
722    tree_iter: gix_object::TreeRefIter<'_>,
723    rewrites: gix_diff::Rewrites,
724) -> Result<Option<TreeDiffChange>, Error> {
725    let mut change: Option<gix_diff::tree_with_rewrites::Change> = None;
726
727    let options: gix_diff::tree_with_rewrites::Options = gix_diff::tree_with_rewrites::Options {
728        location: Some(gix_diff::tree::recorder::Location::Path),
729        rewrites: Some(rewrites),
730    };
731    let result = gix_diff::tree_with_rewrites(
732        parent_tree_iter,
733        tree_iter,
734        resource_cache,
735        state,
736        &odb,
737        |change_ref| -> Result<_, std::convert::Infallible> {
738            if change_ref.location() == file_path {
739                change = Some(change_ref.into_owned());
740                Ok(gix_diff::tree_with_rewrites::Action::Cancel)
741            } else {
742                Ok(gix_diff::tree_with_rewrites::Action::Continue)
743            }
744        },
745        options,
746    );
747    stats.trees_diffed_with_rewrites += 1;
748
749    match result {
750        Ok(_) | Err(gix_diff::tree_with_rewrites::Error::Diff(gix_diff::tree::Error::Cancelled)) => {
751            Ok(change.map(Into::into))
752        }
753        Err(error) => Err(Error::DiffTreeWithRewrites(error)),
754    }
755}
756
757#[allow(clippy::too_many_arguments)]
758fn blob_changes(
759    odb: impl gix_object::Find + gix_object::FindHeader,
760    resource_cache: &mut gix_diff::blob::Platform,
761    oid: ObjectId,
762    previous_oid: ObjectId,
763    file_path: &BStr,
764    previous_file_path: &BStr,
765    diff_algorithm: gix_diff::blob::Algorithm,
766    stats: &mut Statistics,
767) -> Result<Vec<Change>, Error> {
768    /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*.
769    struct ChangeRecorder {
770        last_seen_after_end: u32,
771        hunks: Vec<Change>,
772        total_number_of_lines: u32,
773    }
774
775    impl ChangeRecorder {
776        /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed
777        /// so that the entire file is represented by [`Change`].
778        fn new(total_number_of_lines: u32) -> Self {
779            ChangeRecorder {
780                last_seen_after_end: 0,
781                hunks: Vec::new(),
782                total_number_of_lines,
783            }
784        }
785    }
786
787    impl gix_diff::blob::Sink for ChangeRecorder {
788        type Out = Vec<Change>;
789
790        fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
791            // This checks for unchanged hunks.
792            if after.start > self.last_seen_after_end {
793                self.hunks
794                    .push(Change::Unchanged(self.last_seen_after_end..after.start));
795            }
796
797            match (!before.is_empty(), !after.is_empty()) {
798                (_, true) => {
799                    self.hunks.push(Change::AddedOrReplaced(
800                        after.start..after.end,
801                        before.end - before.start,
802                    ));
803                }
804                (true, false) => {
805                    self.hunks.push(Change::Deleted(after.start, before.end - before.start));
806                }
807                (false, false) => unreachable!("BUG: imara-diff provided a non-change"),
808            }
809            self.last_seen_after_end = after.end;
810        }
811
812        fn finish(mut self) -> Self::Out {
813            if self.total_number_of_lines > self.last_seen_after_end {
814                self.hunks
815                    .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines));
816            }
817            self.hunks
818        }
819    }
820
821    resource_cache.set_resource(
822        previous_oid,
823        gix_object::tree::EntryKind::Blob,
824        previous_file_path,
825        gix_diff::blob::ResourceKind::OldOrSource,
826        &odb,
827    )?;
828    resource_cache.set_resource(
829        oid,
830        gix_object::tree::EntryKind::Blob,
831        file_path,
832        gix_diff::blob::ResourceKind::NewOrDestination,
833        &odb,
834    )?;
835
836    let outcome = resource_cache.prepare_diff()?;
837    let input = gix_diff::blob::intern::InternedInput::new(
838        tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()),
839        tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()),
840    );
841    let number_of_lines_in_destination = input.after.len();
842    let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32);
843
844    let res = gix_diff::blob::diff(diff_algorithm, &input, change_recorder);
845    stats.blobs_diffed += 1;
846    Ok(res)
847}
848
849fn find_path_entry_in_commit(
850    odb: &impl gix_object::Find,
851    commit: &gix_hash::oid,
852    file_path: &BStr,
853    cache: Option<&gix_commitgraph::Graph>,
854    buf: &mut Vec<u8>,
855    buf2: &mut Vec<u8>,
856    stats: &mut Statistics,
857) -> Result<Option<ObjectId>, Error> {
858    let tree_id = find_commit(cache, odb, commit, buf)?.tree_id()?;
859    let tree_iter = odb.find_tree_iter(&tree_id, buf)?;
860    stats.trees_decoded += 1;
861
862    let res = tree_iter.lookup_entry(
863        odb,
864        buf2,
865        file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
866    )?;
867    stats.trees_decoded -= 1;
868    Ok(res.map(|e| e.oid))
869}
870
871type ParentIds = SmallVec<[(gix_hash::ObjectId, i64); 2]>;
872
873fn collect_parents(
874    commit: gix_traverse::commit::Either<'_, '_>,
875    odb: &impl gix_object::Find,
876    cache: Option<&gix_commitgraph::Graph>,
877    buf: &mut Vec<u8>,
878) -> Result<ParentIds, Error> {
879    let mut parent_ids: ParentIds = Default::default();
880    match commit {
881        gix_traverse::commit::Either::CachedCommit(commit) => {
882            let cache = cache
883                .as_ref()
884                .expect("find returned a cached commit, so we expect cache to be present");
885            for parent_pos in commit.iter_parents() {
886                let parent = cache.commit_at(parent_pos?);
887                parent_ids.push((parent.id().to_owned(), parent.committer_timestamp() as i64));
888            }
889        }
890        gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => {
891            for id in commit_ref_iter.parent_ids() {
892                let parent = odb.find_commit_iter(id.as_ref(), buf).ok();
893                let parent_commit_time = parent
894                    .and_then(|parent| parent.committer().ok().map(|committer| committer.seconds()))
895                    .unwrap_or_default();
896                parent_ids.push((id, parent_commit_time));
897            }
898        }
899    }
900    Ok(parent_ids)
901}
902
903/// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
904/// to unify them so the later access shows the right thing.
905pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
906    gix_diff::blob::sources::byte_lines_with_terminator(data)
907}