gix_blame/file/
function.rs

1use std::{num::NonZeroU32, ops::Range};
2
3use gix_diff::{blob::intern::TokenSource, tree::Visit};
4use gix_hash::ObjectId;
5use gix_object::{
6    bstr::{BStr, BString},
7    FindExt,
8};
9use gix_traverse::commit::find as find_commit;
10use smallvec::SmallVec;
11
12use super::{process_changes, Change, UnblamedHunk};
13use crate::{types::BlamePathEntry, BlameEntry, Error, Options, Outcome, Statistics};
14
15/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
16/// at `suspect:<file_path>` originated in.
17///
18/// ## Parameters
19///
20/// * `odb`
21///    - Access to database objects, also for used for diffing.
22///    - Should have an object cache for good diff performance.
23/// * `suspect`
24///    - The first commit to be responsible for parts of `file_path`.
25/// * `cache`
26///    - Optionally, the commitgraph cache.
27/// * `resource_cache`
28///    - Used for diffing trees.
29/// * `file_path`
30///    - A *slash-separated* worktree-relative path to the file to blame.
31/// * `options`
32///    - An instance of [`Options`].
33///
34/// ## The algorithm
35///
36/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that
37/// represent the worktree state.
38///
39/// We begin with one or more *Unblamed Hunks* and a single suspect, usually the `HEAD` commit as the commit containing the
40/// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*.
41/// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`.
42/// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes.
43/// Now the *Unblamed Hunk* is split at the boundaries of each matching change, creating a new *Unblamed Hunk* on each side,
44/// along with a [`BlameEntry`] to represent the match.
45/// This is repeated until there are no non-empty *Unblamed Hunk*s left.
46///
47/// At a high level, what we want to do is the following:
48///
49/// - get the commit
50/// - walk through its parents
51///   - for each parent, do a diff and mark lines that don’t have a suspect yet (this is the term
52///     used in `libgit2`), but that have been changed in this commit
53///
54/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame
55/// suspects. It can be visualized as follows:
56///
57/// <---------------------------------------->
58/// <---------------><----------------------->
59/// <---><----------><----------------------->
60/// <---><----------><-------><-----><------->
61/// <---><---><-----><-------><-----><------->
62/// <---><---><-----><-------><-----><-><-><->
63pub fn file(
64    odb: impl gix_object::Find + gix_object::FindHeader,
65    suspect: ObjectId,
66    cache: Option<gix_commitgraph::Graph>,
67    resource_cache: &mut gix_diff::blob::Platform,
68    file_path: &BStr,
69    options: Options,
70) -> Result<Outcome, Error> {
71    let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);
72
73    let mut stats = Statistics::default();
74    let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
75    let blamed_file_entry_id = find_path_entry_in_commit(
76        &odb,
77        &suspect,
78        file_path,
79        cache.as_ref(),
80        &mut buf,
81        &mut buf2,
82        &mut stats,
83    )?
84    .ok_or_else(|| Error::FileMissing {
85        file_path: file_path.to_owned(),
86        commit_id: suspect,
87    })?;
88    let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
89    let num_lines_in_blamed = tokens_for_diffing(&blamed_file_blob).tokenize().count() as u32;
90
91    // Binary or otherwise empty?
92    if num_lines_in_blamed == 0 {
93        return Ok(Outcome::default());
94    }
95
96    let ranges_to_blame = options.ranges.to_zero_based_exclusive_ranges(num_lines_in_blamed);
97    let mut hunks_to_blame = ranges_to_blame
98        .into_iter()
99        .map(|range| UnblamedHunk::new(range, suspect))
100        .collect::<Vec<_>>();
101
102    let (mut buf, mut buf2) = (Vec::new(), Vec::new());
103    let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
104    let mut queue: gix_revwalk::PriorityQueue<gix_date::SecondsSinceUnixEpoch, ObjectId> =
105        gix_revwalk::PriorityQueue::new();
106    queue.insert(commit.commit_time()?, suspect);
107
108    let mut out = Vec::new();
109    let mut diff_state = gix_diff::tree::State::default();
110    let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
111    let mut blame_path = if options.debug_track_path {
112        Some(Vec::new())
113    } else {
114        None
115    };
116
117    'outer: while let Some(suspect) = queue.pop_value() {
118        stats.commits_traversed += 1;
119        if hunks_to_blame.is_empty() {
120            break;
121        }
122
123        let first_hunk_for_suspect = hunks_to_blame.iter().find(|hunk| hunk.has_suspect(&suspect));
124        let Some(first_hunk_for_suspect) = first_hunk_for_suspect else {
125            // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with
126            // the next one.
127            continue 'outer;
128        };
129
130        let current_file_path = first_hunk_for_suspect
131            .source_file_name
132            .clone()
133            .unwrap_or_else(|| file_path.to_owned());
134
135        let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
136        let commit_time = commit.commit_time()?;
137
138        if let Some(since) = options.since {
139            if commit_time < since.seconds {
140                if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
141                    break 'outer;
142                }
143
144                continue;
145            }
146        }
147
148        let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref(), &mut buf2)?;
149
150        if parent_ids.is_empty() {
151            if queue.is_empty() {
152                // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the
153                // `id` of the last `item` that was yielded by `queue`, so it makes sense to assign
154                // the remaining lines to it, even though we don’t explicitly check whether that is
155                // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
156                // an empty tree to validate this assumption.
157                if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
158                    if let Some(ref mut blame_path) = blame_path {
159                        let entry = previous_entry
160                            .take()
161                            .filter(|(id, _)| *id == suspect)
162                            .map(|(_, entry)| entry);
163
164                        let blame_path_entry = BlamePathEntry {
165                            source_file_path: current_file_path.clone(),
166                            previous_source_file_path: None,
167                            commit_id: suspect,
168                            blob_id: entry.unwrap_or(ObjectId::null(gix_hash::Kind::Sha1)),
169                            previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
170                            parent_index: 0,
171                        };
172                        blame_path.push(blame_path_entry);
173                    }
174
175                    break 'outer;
176                }
177            }
178            // There is more, keep looking.
179            continue;
180        }
181
182        let mut entry = previous_entry
183            .take()
184            .filter(|(id, _)| *id == suspect)
185            .map(|(_, entry)| entry);
186        if entry.is_none() {
187            entry = find_path_entry_in_commit(
188                &odb,
189                &suspect,
190                current_file_path.as_ref(),
191                cache.as_ref(),
192                &mut buf,
193                &mut buf2,
194                &mut stats,
195            )?;
196        }
197
198        let Some(entry_id) = entry else {
199            continue;
200        };
201
202        // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are
203        // identical to the corresponding lines in the *Source File*.
204        #[cfg(debug_assertions)]
205        {
206            let source_blob = odb.find_blob(&entry_id, &mut buf)?.data.to_vec();
207            let mut source_interner = gix_diff::blob::intern::Interner::new(source_blob.len() / 100);
208            let source_lines_as_tokens: Vec<_> = tokens_for_diffing(&source_blob)
209                .tokenize()
210                .map(|token| source_interner.intern(token))
211                .collect();
212
213            let mut blamed_interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100);
214            let blamed_lines_as_tokens: Vec<_> = tokens_for_diffing(&blamed_file_blob)
215                .tokenize()
216                .map(|token| blamed_interner.intern(token))
217                .collect();
218
219            for hunk in hunks_to_blame.iter() {
220                if let Some(range_in_suspect) = hunk.get_range(&suspect) {
221                    let range_in_blamed_file = hunk.range_in_blamed_file.clone();
222
223                    let source_lines = range_in_suspect
224                        .clone()
225                        .map(|i| BString::new(source_interner[source_lines_as_tokens[i as usize]].into()))
226                        .collect::<Vec<_>>();
227                    let blamed_lines = range_in_blamed_file
228                        .clone()
229                        .map(|i| BString::new(blamed_interner[blamed_lines_as_tokens[i as usize]].into()))
230                        .collect::<Vec<_>>();
231
232                    assert_eq!(source_lines, blamed_lines);
233                }
234            }
235        }
236
237        for (pid, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() {
238            if let Some(parent_entry_id) = find_path_entry_in_commit(
239                &odb,
240                parent_id,
241                current_file_path.as_ref(),
242                cache.as_ref(),
243                &mut buf,
244                &mut buf2,
245                &mut stats,
246            )? {
247                let no_change_in_entry = entry_id == parent_entry_id;
248                if pid == 0 {
249                    previous_entry = Some((*parent_id, parent_entry_id));
250                }
251                if no_change_in_entry {
252                    pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
253                    queue.insert(*parent_commit_time, *parent_id);
254                    continue 'outer;
255                }
256            }
257        }
258
259        let more_than_one_parent = parent_ids.len() > 1;
260        for (index, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() {
261            queue.insert(*parent_commit_time, *parent_id);
262            let changes_for_file_path = tree_diff_at_file_path(
263                &odb,
264                current_file_path.as_ref(),
265                suspect,
266                *parent_id,
267                cache.as_ref(),
268                &mut stats,
269                &mut diff_state,
270                resource_cache,
271                &mut buf,
272                &mut buf2,
273                &mut buf3,
274                options.rewrites,
275            )?;
276            let Some(modification) = changes_for_file_path else {
277                if more_than_one_parent {
278                    // None of the changes affected the file we’re currently blaming.
279                    // Copy blame to parent.
280                    for unblamed_hunk in &mut hunks_to_blame {
281                        unblamed_hunk.clone_blame(suspect, *parent_id);
282                    }
283                } else {
284                    pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
285                }
286                continue;
287            };
288
289            match modification {
290                TreeDiffChange::Addition { id } => {
291                    if more_than_one_parent {
292                        // Do nothing under the assumption that this always (or almost always)
293                        // implies that the file comes from a different parent, compared to which
294                        // it was modified, not added.
295                    } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
296                        if let Some(ref mut blame_path) = blame_path {
297                            let blame_path_entry = BlamePathEntry {
298                                source_file_path: current_file_path.clone(),
299                                previous_source_file_path: None,
300                                commit_id: suspect,
301                                blob_id: id,
302                                previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
303                                parent_index: index,
304                            };
305                            blame_path.push(blame_path_entry);
306                        }
307
308                        break 'outer;
309                    }
310                }
311                TreeDiffChange::Deletion => {
312                    unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted")
313                }
314                TreeDiffChange::Modification { previous_id, id } => {
315                    let changes = blob_changes(
316                        &odb,
317                        resource_cache,
318                        id,
319                        previous_id,
320                        file_path,
321                        file_path,
322                        options.diff_algorithm,
323                        &mut stats,
324                    )?;
325                    hunks_to_blame = process_changes(hunks_to_blame, changes.clone(), suspect, *parent_id);
326                    if let Some(ref mut blame_path) = blame_path {
327                        let has_blame_been_passed = hunks_to_blame.iter().any(|hunk| hunk.has_suspect(parent_id));
328
329                        if has_blame_been_passed {
330                            let blame_path_entry = BlamePathEntry {
331                                source_file_path: current_file_path.clone(),
332                                previous_source_file_path: Some(current_file_path.clone()),
333                                commit_id: suspect,
334                                blob_id: id,
335                                previous_blob_id: previous_id,
336                                parent_index: index,
337                            };
338                            blame_path.push(blame_path_entry);
339                        }
340                    }
341                }
342                TreeDiffChange::Rewrite {
343                    source_location,
344                    source_id,
345                    id,
346                } => {
347                    let changes = blob_changes(
348                        &odb,
349                        resource_cache,
350                        id,
351                        source_id,
352                        file_path,
353                        source_location.as_ref(),
354                        options.diff_algorithm,
355                        &mut stats,
356                    )?;
357                    hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, *parent_id);
358
359                    let mut has_blame_been_passed = false;
360
361                    for hunk in hunks_to_blame.iter_mut() {
362                        if hunk.has_suspect(parent_id) {
363                            hunk.source_file_name = Some(source_location.clone());
364
365                            has_blame_been_passed = true;
366                        }
367                    }
368
369                    if has_blame_been_passed {
370                        if let Some(ref mut blame_path) = blame_path {
371                            let blame_path_entry = BlamePathEntry {
372                                source_file_path: current_file_path.clone(),
373                                previous_source_file_path: Some(source_location.clone()),
374                                commit_id: suspect,
375                                blob_id: id,
376                                previous_blob_id: source_id,
377                                parent_index: index,
378                            };
379                            blame_path.push(blame_path_entry);
380                        }
381                    }
382                }
383            }
384        }
385
386        hunks_to_blame.retain_mut(|unblamed_hunk| {
387            if unblamed_hunk.suspects.len() == 1 {
388                if let Some(entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) {
389                    // At this point, we have copied blame for every hunk to a parent. Hunks
390                    // that have only `suspect` left in `suspects` have not passed blame to any
391                    // parent, and so they can be converted to a `BlameEntry` and moved to
392                    // `out`.
393                    out.push(entry);
394                    return false;
395                }
396            }
397            unblamed_hunk.remove_blame(suspect);
398            true
399        });
400    }
401
402    debug_assert_eq!(
403        hunks_to_blame,
404        vec![],
405        "only if there is no portion of the file left we have completed the blame"
406    );
407
408    // I don’t know yet whether it would make sense to use a data structure instead that preserves
409    // order on insertion.
410    out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
411    Ok(Outcome {
412        entries: coalesce_blame_entries(out),
413        blob: blamed_file_blob,
414        statistics: stats,
415        blame_path,
416    })
417}
418
419/// Pass ownership of each unblamed hunk of `from` to `to`.
420///
421/// This happens when `from` didn't actually change anything in the blamed file.
422fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<UnblamedHunk>) {
423    for unblamed_hunk in hunks_to_blame {
424        unblamed_hunk.pass_blame(from, to);
425    }
426}
427
428/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process.
429///
430/// Return `true` if we are done because `hunks_to_blame` is empty.
431fn unblamed_to_out_is_done(
432    hunks_to_blame: &mut Vec<UnblamedHunk>,
433    out: &mut Vec<BlameEntry>,
434    suspect: ObjectId,
435) -> bool {
436    let mut without_suspect = Vec::new();
437    out.extend(hunks_to_blame.drain(..).filter_map(|hunk| {
438        BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| {
439            without_suspect.push(hunk);
440            None
441        })
442    }));
443    *hunks_to_blame = without_suspect;
444    hunks_to_blame.is_empty()
445}
446
447/// This function merges adjacent blame entries. It merges entries that are adjacent both in the
448/// blamed file and in the source file that introduced them. This follows `git`’s
449/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the
450/// blamed file which can result in different blames in certain edge cases. See [the commit][1]
451/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test
452/// for this behaviour in `git`.
453///
454/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a
455/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131
456fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
457    let len = lines_blamed.len();
458    lines_blamed
459        .into_iter()
460        .fold(Vec::with_capacity(len), |mut acc, entry| {
461            let previous_entry = acc.last();
462
463            if let Some(previous_entry) = previous_entry {
464                let previous_blamed_range = previous_entry.range_in_blamed_file();
465                let current_blamed_range = entry.range_in_blamed_file();
466                let previous_source_range = previous_entry.range_in_source_file();
467                let current_source_range = entry.range_in_source_file();
468                if previous_entry.commit_id == entry.commit_id
469                    && previous_blamed_range.end == current_blamed_range.start
470                    // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`.
471                    && previous_source_range.end == current_source_range.start
472                {
473                    let coalesced_entry = BlameEntry {
474                        start_in_blamed_file: previous_blamed_range.start as u32,
475                        start_in_source_file: previous_source_range.start as u32,
476                        len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32)
477                            .expect("BUG: hunks are never zero-sized"),
478                        commit_id: previous_entry.commit_id,
479                        source_file_name: previous_entry.source_file_name.clone(),
480                    };
481
482                    acc.pop();
483                    acc.push(coalesced_entry);
484                } else {
485                    acc.push(entry);
486                }
487
488                acc
489            } else {
490                acc.push(entry);
491
492                acc
493            }
494        })
495}
496
497/// The union of [`gix_diff::tree::recorder::Change`] and [`gix_diff::tree_with_rewrites::Change`],
498/// keeping only the blame-relevant information.
499enum TreeDiffChange {
500    Addition {
501        id: ObjectId,
502    },
503    Deletion,
504    Modification {
505        previous_id: ObjectId,
506        id: ObjectId,
507    },
508    Rewrite {
509        source_location: BString,
510        source_id: ObjectId,
511        id: ObjectId,
512    },
513}
514
515impl From<gix_diff::tree::recorder::Change> for TreeDiffChange {
516    fn from(value: gix_diff::tree::recorder::Change) -> Self {
517        use gix_diff::tree::recorder::Change;
518
519        match value {
520            Change::Addition { oid, .. } => Self::Addition { id: oid },
521            Change::Deletion { .. } => Self::Deletion,
522            Change::Modification { previous_oid, oid, .. } => Self::Modification {
523                previous_id: previous_oid,
524                id: oid,
525            },
526        }
527    }
528}
529
530impl From<gix_diff::tree_with_rewrites::Change> for TreeDiffChange {
531    fn from(value: gix_diff::tree_with_rewrites::Change) -> Self {
532        use gix_diff::tree_with_rewrites::Change;
533
534        match value {
535            Change::Addition { id, .. } => Self::Addition { id },
536            Change::Deletion { .. } => Self::Deletion,
537            Change::Modification { previous_id, id, .. } => Self::Modification { previous_id, id },
538            Change::Rewrite {
539                source_location,
540                source_id,
541                id,
542                ..
543            } => Self::Rewrite {
544                source_location,
545                source_id,
546                id,
547            },
548        }
549    }
550}
551
552#[allow(clippy::too_many_arguments)]
553fn tree_diff_at_file_path(
554    odb: impl gix_object::Find + gix_object::FindHeader,
555    file_path: &BStr,
556    id: ObjectId,
557    parent_id: ObjectId,
558    cache: Option<&gix_commitgraph::Graph>,
559    stats: &mut Statistics,
560    state: &mut gix_diff::tree::State,
561    resource_cache: &mut gix_diff::blob::Platform,
562    commit_buf: &mut Vec<u8>,
563    lhs_tree_buf: &mut Vec<u8>,
564    rhs_tree_buf: &mut Vec<u8>,
565    rewrites: Option<gix_diff::Rewrites>,
566) -> Result<Option<TreeDiffChange>, Error> {
567    let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?;
568
569    let parent_tree_iter = odb.find_tree_iter(&parent_tree_id, lhs_tree_buf)?;
570    stats.trees_decoded += 1;
571
572    let tree_id = find_commit(cache, &odb, &id, commit_buf)?.tree_id()?;
573
574    let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?;
575    stats.trees_decoded += 1;
576
577    let result = tree_diff_without_rewrites_at_file_path(&odb, file_path, stats, state, parent_tree_iter, tree_iter)?;
578
579    // Here, we follow git’s behaviour. We return when we’ve found a `Modification`. We try a
580    // second time with rename tracking when the change is either an `Addition` or a `Deletion`
581    // because those can turn out to have been a `Rewrite`.
582    // TODO(perf): renames are usually rare enough to not care about the work duplication done here.
583    //             But in theory, a rename tracker could be used by us, on demand, and we could stuff the
584    //             changes in there and have it find renames, without repeating the diff.
585    if matches!(result, Some(TreeDiffChange::Modification { .. })) {
586        return Ok(result);
587    }
588    let Some(rewrites) = rewrites else {
589        return Ok(result);
590    };
591
592    let result = tree_diff_with_rewrites_at_file_path(
593        &odb,
594        file_path,
595        stats,
596        state,
597        resource_cache,
598        parent_tree_iter,
599        tree_iter,
600        rewrites,
601    )?;
602
603    Ok(result)
604}
605
606#[allow(clippy::too_many_arguments)]
607fn tree_diff_without_rewrites_at_file_path(
608    odb: impl gix_object::Find + gix_object::FindHeader,
609    file_path: &BStr,
610    stats: &mut Statistics,
611    state: &mut gix_diff::tree::State,
612    parent_tree_iter: gix_object::TreeRefIter<'_>,
613    tree_iter: gix_object::TreeRefIter<'_>,
614) -> Result<Option<TreeDiffChange>, Error> {
615    struct FindChangeToPath {
616        inner: gix_diff::tree::Recorder,
617        interesting_path: BString,
618        change: Option<gix_diff::tree::recorder::Change>,
619    }
620
621    impl FindChangeToPath {
622        fn new(interesting_path: BString) -> Self {
623            let inner =
624                gix_diff::tree::Recorder::default().track_location(Some(gix_diff::tree::recorder::Location::Path));
625
626            FindChangeToPath {
627                inner,
628                interesting_path,
629                change: None,
630            }
631        }
632    }
633
634    impl Visit for FindChangeToPath {
635        fn pop_front_tracked_path_and_set_current(&mut self) {
636            self.inner.pop_front_tracked_path_and_set_current();
637        }
638
639        fn push_back_tracked_path_component(&mut self, component: &BStr) {
640            self.inner.push_back_tracked_path_component(component);
641        }
642
643        fn push_path_component(&mut self, component: &BStr) {
644            self.inner.push_path_component(component);
645        }
646
647        fn pop_path_component(&mut self) {
648            self.inner.pop_path_component();
649        }
650
651        fn visit(&mut self, change: gix_diff::tree::visit::Change) -> gix_diff::tree::visit::Action {
652            use gix_diff::tree::{visit, visit::Change::*};
653
654            if self.inner.path() == self.interesting_path {
655                self.change = Some(match change {
656                    Deletion {
657                        entry_mode,
658                        oid,
659                        relation,
660                    } => gix_diff::tree::recorder::Change::Deletion {
661                        entry_mode,
662                        oid,
663                        path: self.inner.path_clone(),
664                        relation,
665                    },
666                    Addition {
667                        entry_mode,
668                        oid,
669                        relation,
670                    } => gix_diff::tree::recorder::Change::Addition {
671                        entry_mode,
672                        oid,
673                        path: self.inner.path_clone(),
674                        relation,
675                    },
676                    Modification {
677                        previous_entry_mode,
678                        previous_oid,
679                        entry_mode,
680                        oid,
681                    } => gix_diff::tree::recorder::Change::Modification {
682                        previous_entry_mode,
683                        previous_oid,
684                        entry_mode,
685                        oid,
686                        path: self.inner.path_clone(),
687                    },
688                });
689
690                visit::Action::Cancel
691            } else {
692                visit::Action::Continue
693            }
694        }
695    }
696
697    let mut recorder = FindChangeToPath::new(file_path.into());
698    let result = gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder);
699    stats.trees_diffed += 1;
700
701    match result {
702        Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change.map(Into::into)),
703        Err(error) => Err(Error::DiffTree(error)),
704    }
705}
706
707#[allow(clippy::too_many_arguments)]
708fn tree_diff_with_rewrites_at_file_path(
709    odb: impl gix_object::Find + gix_object::FindHeader,
710    file_path: &BStr,
711    stats: &mut Statistics,
712    state: &mut gix_diff::tree::State,
713    resource_cache: &mut gix_diff::blob::Platform,
714    parent_tree_iter: gix_object::TreeRefIter<'_>,
715    tree_iter: gix_object::TreeRefIter<'_>,
716    rewrites: gix_diff::Rewrites,
717) -> Result<Option<TreeDiffChange>, Error> {
718    let mut change: Option<gix_diff::tree_with_rewrites::Change> = None;
719
720    let options: gix_diff::tree_with_rewrites::Options = gix_diff::tree_with_rewrites::Options {
721        location: Some(gix_diff::tree::recorder::Location::Path),
722        rewrites: Some(rewrites),
723    };
724    let result = gix_diff::tree_with_rewrites(
725        parent_tree_iter,
726        tree_iter,
727        resource_cache,
728        state,
729        &odb,
730        |change_ref| -> Result<_, std::convert::Infallible> {
731            if change_ref.location() == file_path {
732                change = Some(change_ref.into_owned());
733                Ok(gix_diff::tree_with_rewrites::Action::Cancel)
734            } else {
735                Ok(gix_diff::tree_with_rewrites::Action::Continue)
736            }
737        },
738        options,
739    );
740    stats.trees_diffed_with_rewrites += 1;
741
742    match result {
743        Ok(_) | Err(gix_diff::tree_with_rewrites::Error::Diff(gix_diff::tree::Error::Cancelled)) => {
744            Ok(change.map(Into::into))
745        }
746        Err(error) => Err(Error::DiffTreeWithRewrites(error)),
747    }
748}
749
750#[allow(clippy::too_many_arguments)]
751fn blob_changes(
752    odb: impl gix_object::Find + gix_object::FindHeader,
753    resource_cache: &mut gix_diff::blob::Platform,
754    oid: ObjectId,
755    previous_oid: ObjectId,
756    file_path: &BStr,
757    previous_file_path: &BStr,
758    diff_algorithm: gix_diff::blob::Algorithm,
759    stats: &mut Statistics,
760) -> Result<Vec<Change>, Error> {
761    /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*.
762    struct ChangeRecorder {
763        last_seen_after_end: u32,
764        hunks: Vec<Change>,
765        total_number_of_lines: u32,
766    }
767
768    impl ChangeRecorder {
769        /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed
770        /// so that the entire file is represented by [`Change`].
771        fn new(total_number_of_lines: u32) -> Self {
772            ChangeRecorder {
773                last_seen_after_end: 0,
774                hunks: Vec::new(),
775                total_number_of_lines,
776            }
777        }
778    }
779
780    impl gix_diff::blob::Sink for ChangeRecorder {
781        type Out = Vec<Change>;
782
783        fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
784            // This checks for unchanged hunks.
785            if after.start > self.last_seen_after_end {
786                self.hunks
787                    .push(Change::Unchanged(self.last_seen_after_end..after.start));
788            }
789
790            match (!before.is_empty(), !after.is_empty()) {
791                (_, true) => {
792                    self.hunks.push(Change::AddedOrReplaced(
793                        after.start..after.end,
794                        before.end - before.start,
795                    ));
796                }
797                (true, false) => {
798                    self.hunks.push(Change::Deleted(after.start, before.end - before.start));
799                }
800                (false, false) => unreachable!("BUG: imara-diff provided a non-change"),
801            }
802            self.last_seen_after_end = after.end;
803        }
804
805        fn finish(mut self) -> Self::Out {
806            if self.total_number_of_lines > self.last_seen_after_end {
807                self.hunks
808                    .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines));
809            }
810            self.hunks
811        }
812    }
813
814    resource_cache.set_resource(
815        previous_oid,
816        gix_object::tree::EntryKind::Blob,
817        previous_file_path,
818        gix_diff::blob::ResourceKind::OldOrSource,
819        &odb,
820    )?;
821    resource_cache.set_resource(
822        oid,
823        gix_object::tree::EntryKind::Blob,
824        file_path,
825        gix_diff::blob::ResourceKind::NewOrDestination,
826        &odb,
827    )?;
828
829    let outcome = resource_cache.prepare_diff()?;
830    let input = gix_diff::blob::intern::InternedInput::new(
831        tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()),
832        tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()),
833    );
834    let number_of_lines_in_destination = input.after.len();
835    let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32);
836
837    let res = gix_diff::blob::diff(diff_algorithm, &input, change_recorder);
838    stats.blobs_diffed += 1;
839    Ok(res)
840}
841
842fn find_path_entry_in_commit(
843    odb: &impl gix_object::Find,
844    commit: &gix_hash::oid,
845    file_path: &BStr,
846    cache: Option<&gix_commitgraph::Graph>,
847    buf: &mut Vec<u8>,
848    buf2: &mut Vec<u8>,
849    stats: &mut Statistics,
850) -> Result<Option<ObjectId>, Error> {
851    let tree_id = find_commit(cache, odb, commit, buf)?.tree_id()?;
852    let tree_iter = odb.find_tree_iter(&tree_id, buf)?;
853    stats.trees_decoded += 1;
854
855    let res = tree_iter.lookup_entry(
856        odb,
857        buf2,
858        file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
859    )?;
860    stats.trees_decoded -= 1;
861    Ok(res.map(|e| e.oid))
862}
863
864type ParentIds = SmallVec<[(gix_hash::ObjectId, i64); 2]>;
865
866fn collect_parents(
867    commit: gix_traverse::commit::Either<'_, '_>,
868    odb: &impl gix_object::Find,
869    cache: Option<&gix_commitgraph::Graph>,
870    buf: &mut Vec<u8>,
871) -> Result<ParentIds, Error> {
872    let mut parent_ids: ParentIds = Default::default();
873    match commit {
874        gix_traverse::commit::Either::CachedCommit(commit) => {
875            let cache = cache
876                .as_ref()
877                .expect("find returned a cached commit, so we expect cache to be present");
878            for parent_pos in commit.iter_parents() {
879                let parent = cache.commit_at(parent_pos?);
880                parent_ids.push((parent.id().to_owned(), parent.committer_timestamp() as i64));
881            }
882        }
883        gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => {
884            for id in commit_ref_iter.parent_ids() {
885                let parent = odb.find_commit_iter(id.as_ref(), buf).ok();
886                let parent_commit_time = parent
887                    .and_then(|parent| parent.committer().ok().map(|committer| committer.seconds()))
888                    .unwrap_or_default();
889                parent_ids.push((id, parent_commit_time));
890            }
891        }
892    }
893    Ok(parent_ids)
894}
895
896/// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
897/// to unify them so the later access shows the right thing.
898pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
899    gix_diff::blob::sources::byte_lines_with_terminator(data)
900}