gix_blame/file/
function.rs

1use super::{process_changes, Change, UnblamedHunk};
2use crate::{BlameEntry, Error, Options, Outcome, Statistics};
3use gix_diff::blob::intern::TokenSource;
4use gix_diff::tree::Visit;
5use gix_hash::ObjectId;
6use gix_object::{
7    bstr::{BStr, BString},
8    FindExt,
9};
10use gix_traverse::commit::find as find_commit;
11use smallvec::SmallVec;
12use std::num::NonZeroU32;
13use std::ops::Range;
14
15/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
16/// at `suspect:<file_path>` originated in.
17///
18/// ## Paramters
19///
20/// * `odb`
21///    - Access to database objects, also for used for diffing.
22///    - Should have an object cache for good diff performance.
23/// * `suspect`
24///    - The first commit to be responsible for parts of `file_path`.
25/// * `cache`
26///    - Optionally, the commitgraph cache.
27/// * `file_path`
28///    - A *slash-separated* worktree-relative path to the file to blame.
29/// * `range`
30///    - A 1-based inclusive range, in order to mirror `git`’s behaviour. `Some(20..40)` represents
31///      21 lines, spanning from line 20 up to and including line 40. This will be converted to
32///      `19..40` internally as the algorithm uses 0-based ranges that are exclusive at the end.
33/// * `resource_cache`
34///    - Used for diffing trees.
35///
36/// ## The algorithm
37///
38/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that
39/// represent the worktree state.
40/// We begin with a single *Unblamed Hunk* and a single suspect, usually the `HEAD` commit as the commit containing the
41/// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*.
42/// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`.
43/// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes.
44/// Now the *Unblamed Hunk* is split at the boundaries of each matching change, creating a new *Unblamed Hunk* on each side,
45/// along with a [`BlameEntry`] to represent the match.
46/// This is repeated until there are no non-empty *Unblamed Hunk*s left.
47///
48/// At a high level, what we want to do is the following:
49///
50/// - get the commit
51/// - walk through its parents
52///   - for each parent, do a diff and mark lines that don’t have a suspect yet (this is the term
53///     used in `libgit2`), but that have been changed in this commit
54///
55/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame
56/// suspects. It can be visualized as follows:
57//
58// <---------------------------------------->
59// <---------------><----------------------->
60// <---><----------><----------------------->
61// <---><----------><-------><-----><------->
62// <---><---><-----><-------><-----><------->
63// <---><---><-----><-------><-----><-><-><->
64pub fn file(
65    odb: impl gix_object::Find + gix_object::FindHeader,
66    suspect: ObjectId,
67    cache: Option<gix_commitgraph::Graph>,
68    resource_cache: &mut gix_diff::blob::Platform,
69    file_path: &BStr,
70    options: Options,
71) -> Result<Outcome, Error> {
72    let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);
73
74    let mut stats = Statistics::default();
75    let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
76    let blamed_file_entry_id = find_path_entry_in_commit(
77        &odb,
78        &suspect,
79        file_path,
80        cache.as_ref(),
81        &mut buf,
82        &mut buf2,
83        &mut stats,
84    )?
85    .ok_or_else(|| Error::FileMissing {
86        file_path: file_path.to_owned(),
87        commit_id: suspect,
88    })?;
89    let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
90    let num_lines_in_blamed = tokens_for_diffing(&blamed_file_blob).tokenize().count() as u32;
91
92    // Binary or otherwise empty?
93    if num_lines_in_blamed == 0 {
94        return Ok(Outcome::default());
95    }
96
97    let range_in_blamed_file = one_based_inclusive_to_zero_based_exclusive_range(options.range, num_lines_in_blamed)?;
98    let mut hunks_to_blame = vec![UnblamedHunk {
99        range_in_blamed_file: range_in_blamed_file.clone(),
100        suspects: [(suspect, range_in_blamed_file)].into(),
101    }];
102
103    let (mut buf, mut buf2) = (Vec::new(), Vec::new());
104    let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
105    let mut queue: gix_revwalk::PriorityQueue<CommitTime, ObjectId> = gix_revwalk::PriorityQueue::new();
106    queue.insert(commit_time(commit)?, suspect);
107
108    let mut out = Vec::new();
109    let mut diff_state = gix_diff::tree::State::default();
110    let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
111    'outer: while let Some(suspect) = queue.pop_value() {
112        stats.commits_traversed += 1;
113        if hunks_to_blame.is_empty() {
114            break;
115        }
116
117        let is_still_suspect = hunks_to_blame.iter().any(|hunk| hunk.suspects.contains_key(&suspect));
118        if !is_still_suspect {
119            // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with
120            // the next one.
121            continue 'outer;
122        }
123
124        let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
125        let commit_time = commit_time(commit)?;
126
127        if let Some(since) = options.since {
128            if commit_time < since.seconds {
129                if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
130                    break 'outer;
131                }
132
133                continue;
134            }
135        }
136
137        let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref(), &mut buf2)?;
138
139        if parent_ids.is_empty() {
140            if queue.is_empty() {
141                // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the
142                // `id` of the last `item` that was yielded by `queue`, so it makes sense to assign
143                // the remaining lines to it, even though we don’t explicitly check whether that is
144                // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
145                // an empty tree to validate this assumption.
146                if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
147                    break 'outer;
148                }
149            }
150            // There is more, keep looking.
151            continue;
152        }
153
154        let mut entry = previous_entry
155            .take()
156            .filter(|(id, _)| *id == suspect)
157            .map(|(_, entry)| entry);
158        if entry.is_none() {
159            entry = find_path_entry_in_commit(
160                &odb,
161                &suspect,
162                file_path,
163                cache.as_ref(),
164                &mut buf,
165                &mut buf2,
166                &mut stats,
167            )?;
168        }
169
170        let Some(entry_id) = entry else {
171            continue;
172        };
173
174        // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are
175        // identical to the corresponding lines in the *Source File*.
176        #[cfg(debug_assertions)]
177        {
178            let source_blob = odb.find_blob(&entry_id, &mut buf)?.data.to_vec();
179            let mut source_interner = gix_diff::blob::intern::Interner::new(source_blob.len() / 100);
180            let source_lines_as_tokens: Vec<_> = tokens_for_diffing(&source_blob)
181                .tokenize()
182                .map(|token| source_interner.intern(token))
183                .collect();
184
185            let mut blamed_interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100);
186            let blamed_lines_as_tokens: Vec<_> = tokens_for_diffing(&blamed_file_blob)
187                .tokenize()
188                .map(|token| blamed_interner.intern(token))
189                .collect();
190
191            for hunk in hunks_to_blame.iter() {
192                if let Some(range_in_suspect) = hunk.suspects.get(&suspect) {
193                    let range_in_blamed_file = hunk.range_in_blamed_file.clone();
194
195                    for (blamed_line_number, source_line_number) in range_in_blamed_file.zip(range_in_suspect.clone()) {
196                        let source_token = source_lines_as_tokens[source_line_number as usize];
197                        let blame_token = blamed_lines_as_tokens[blamed_line_number as usize];
198
199                        let source_line = BString::new(source_interner[source_token].into());
200                        let blamed_line = BString::new(blamed_interner[blame_token].into());
201
202                        assert_eq!(source_line, blamed_line);
203                    }
204                }
205            }
206        }
207
208        for (pid, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() {
209            if let Some(parent_entry_id) = find_path_entry_in_commit(
210                &odb,
211                parent_id,
212                file_path,
213                cache.as_ref(),
214                &mut buf,
215                &mut buf2,
216                &mut stats,
217            )? {
218                let no_change_in_entry = entry_id == parent_entry_id;
219                if pid == 0 {
220                    previous_entry = Some((*parent_id, parent_entry_id));
221                }
222                if no_change_in_entry {
223                    pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
224                    queue.insert(*parent_commit_time, *parent_id);
225                    continue 'outer;
226                }
227            }
228        }
229
230        let more_than_one_parent = parent_ids.len() > 1;
231        for (parent_id, parent_commit_time) in parent_ids {
232            queue.insert(parent_commit_time, parent_id);
233            let changes_for_file_path = tree_diff_at_file_path(
234                &odb,
235                file_path,
236                suspect,
237                parent_id,
238                cache.as_ref(),
239                &mut stats,
240                &mut diff_state,
241                &mut buf,
242                &mut buf2,
243                &mut buf3,
244            )?;
245            let Some(modification) = changes_for_file_path else {
246                if more_than_one_parent {
247                    // None of the changes affected the file we’re currently blaming.
248                    // Copy blame to parent.
249                    for unblamed_hunk in &mut hunks_to_blame {
250                        unblamed_hunk.clone_blame(suspect, parent_id);
251                    }
252                } else {
253                    pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
254                }
255                continue;
256            };
257
258            match modification {
259                gix_diff::tree::recorder::Change::Addition { .. } => {
260                    if more_than_one_parent {
261                        // Do nothing under the assumption that this always (or almost always)
262                        // implies that the file comes from a different parent, compared to which
263                        // it was modified, not added.
264                    } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
265                        break 'outer;
266                    }
267                }
268                gix_diff::tree::recorder::Change::Deletion { .. } => {
269                    unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted")
270                }
271                gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
272                    let changes = blob_changes(
273                        &odb,
274                        resource_cache,
275                        oid,
276                        previous_oid,
277                        file_path,
278                        options.diff_algorithm,
279                        &mut stats,
280                    )?;
281                    hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
282                }
283            }
284        }
285
286        hunks_to_blame.retain_mut(|unblamed_hunk| {
287            if unblamed_hunk.suspects.len() == 1 {
288                if let Some(entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) {
289                    // At this point, we have copied blame for every hunk to a parent. Hunks
290                    // that have only `suspect` left in `suspects` have not passed blame to any
291                    // parent, and so they can be converted to a `BlameEntry` and moved to
292                    // `out`.
293                    out.push(entry);
294                    return false;
295                }
296            }
297            unblamed_hunk.remove_blame(suspect);
298            true
299        });
300
301        // This block asserts that line ranges for each suspect never overlap. If they did overlap
302        // this would mean that the same line in a *Source File* would map to more than one line in
303        // the *Blamed File* and this is not possible.
304        #[cfg(debug_assertions)]
305        {
306            let ranges = hunks_to_blame.iter().fold(
307                std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
308                |mut acc, hunk| {
309                    for (suspect, range) in hunk.suspects.clone() {
310                        acc.entry(suspect).or_default().push(range);
311                    }
312
313                    acc
314                },
315            );
316
317            for (_, mut ranges) in ranges {
318                ranges.sort_by(|a, b| a.start.cmp(&b.start));
319
320                for window in ranges.windows(2) {
321                    if let [a, b] = window {
322                        assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
323                    }
324                }
325            }
326        }
327    }
328
329    debug_assert_eq!(
330        hunks_to_blame,
331        vec![],
332        "only if there is no portion of the file left we have completed the blame"
333    );
334
335    // I don’t know yet whether it would make sense to use a data structure instead that preserves
336    // order on insertion.
337    out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
338    Ok(Outcome {
339        entries: coalesce_blame_entries(out),
340        blob: blamed_file_blob,
341        statistics: stats,
342    })
343}
344
345/// This function assumes that `range` has 1-based inclusive line numbers and converts it to the
346/// format internally used: 0-based line numbers stored in ranges that are exclusive at the
347/// end.
348fn one_based_inclusive_to_zero_based_exclusive_range(
349    range: Option<Range<u32>>,
350    max_lines: u32,
351) -> Result<Range<u32>, Error> {
352    let Some(range) = range else { return Ok(0..max_lines) };
353    if range.start == 0 {
354        return Err(Error::InvalidLineRange);
355    }
356    let start = range.start - 1;
357    let end = range.end;
358    if start >= max_lines || end > max_lines || start == end {
359        return Err(Error::InvalidLineRange);
360    }
361    Ok(start..end)
362}
363
364/// Pass ownership of each unblamed hunk of `from` to `to`.
365///
366/// This happens when `from` didn't actually change anything in the blamed file.
367fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<UnblamedHunk>) {
368    for unblamed_hunk in hunks_to_blame {
369        unblamed_hunk.pass_blame(from, to);
370    }
371}
372
373/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process.
374///
375/// Return `true` if we are done because `hunks_to_blame` is empty.
376fn unblamed_to_out_is_done(
377    hunks_to_blame: &mut Vec<UnblamedHunk>,
378    out: &mut Vec<BlameEntry>,
379    suspect: ObjectId,
380) -> bool {
381    let mut without_suspect = Vec::new();
382    out.extend(hunks_to_blame.drain(..).filter_map(|hunk| {
383        BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| {
384            without_suspect.push(hunk);
385            None
386        })
387    }));
388    *hunks_to_blame = without_suspect;
389    hunks_to_blame.is_empty()
390}
391
392/// This function merges adjacent blame entries. It merges entries that are adjacent both in the
393/// blamed file and in the source file that introduced them. This follows `git`’s
394/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the
395/// blamed file which can result in different blames in certain edge cases. See [the commit][1]
396/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test
397/// for this behaviour in `git`.
398///
399/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a
400/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131
401fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
402    let len = lines_blamed.len();
403    lines_blamed
404        .into_iter()
405        .fold(Vec::with_capacity(len), |mut acc, entry| {
406            let previous_entry = acc.last();
407
408            if let Some(previous_entry) = previous_entry {
409                let previous_blamed_range = previous_entry.range_in_blamed_file();
410                let current_blamed_range = entry.range_in_blamed_file();
411                let previous_source_range = previous_entry.range_in_source_file();
412                let current_source_range = entry.range_in_source_file();
413                if previous_entry.commit_id == entry.commit_id
414                    && previous_blamed_range.end == current_blamed_range.start
415                    // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`.
416                    && previous_source_range.end == current_source_range.start
417                {
418                    // let combined_range =
419                    let coalesced_entry = BlameEntry {
420                        start_in_blamed_file: previous_blamed_range.start as u32,
421                        start_in_source_file: previous_source_range.start as u32,
422                        len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32)
423                            .expect("BUG: hunks are never zero-sized"),
424                        commit_id: previous_entry.commit_id,
425                    };
426
427                    acc.pop();
428                    acc.push(coalesced_entry);
429                } else {
430                    acc.push(entry);
431                }
432
433                acc
434            } else {
435                acc.push(entry);
436
437                acc
438            }
439        })
440}
441
442#[allow(clippy::too_many_arguments)]
443fn tree_diff_at_file_path(
444    odb: impl gix_object::Find + gix_object::FindHeader,
445    file_path: &BStr,
446    id: ObjectId,
447    parent_id: ObjectId,
448    cache: Option<&gix_commitgraph::Graph>,
449    stats: &mut Statistics,
450    state: &mut gix_diff::tree::State,
451    commit_buf: &mut Vec<u8>,
452    lhs_tree_buf: &mut Vec<u8>,
453    rhs_tree_buf: &mut Vec<u8>,
454) -> Result<Option<gix_diff::tree::recorder::Change>, Error> {
455    let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?;
456
457    let parent_tree_iter = odb.find_tree_iter(&parent_tree_id, lhs_tree_buf)?;
458    stats.trees_decoded += 1;
459
460    let tree_id = find_commit(cache, &odb, &id, commit_buf)?.tree_id()?;
461
462    let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?;
463    stats.trees_decoded += 1;
464
465    struct FindChangeToPath {
466        inner: gix_diff::tree::Recorder,
467        interesting_path: BString,
468        change: Option<gix_diff::tree::recorder::Change>,
469    }
470
471    impl FindChangeToPath {
472        fn new(interesting_path: BString) -> Self {
473            let inner =
474                gix_diff::tree::Recorder::default().track_location(Some(gix_diff::tree::recorder::Location::Path));
475
476            FindChangeToPath {
477                inner,
478                interesting_path,
479                change: None,
480            }
481        }
482    }
483
484    impl Visit for FindChangeToPath {
485        fn pop_front_tracked_path_and_set_current(&mut self) {
486            self.inner.pop_front_tracked_path_and_set_current();
487        }
488
489        fn push_back_tracked_path_component(&mut self, component: &BStr) {
490            self.inner.push_back_tracked_path_component(component);
491        }
492
493        fn push_path_component(&mut self, component: &BStr) {
494            self.inner.push_path_component(component);
495        }
496
497        fn pop_path_component(&mut self) {
498            self.inner.pop_path_component();
499        }
500
501        fn visit(&mut self, change: gix_diff::tree::visit::Change) -> gix_diff::tree::visit::Action {
502            use gix_diff::tree::visit;
503            use gix_diff::tree::visit::Change::*;
504
505            if self.inner.path() == self.interesting_path {
506                self.change = Some(match change {
507                    Deletion {
508                        entry_mode,
509                        oid,
510                        relation,
511                    } => gix_diff::tree::recorder::Change::Deletion {
512                        entry_mode,
513                        oid,
514                        path: self.inner.path_clone(),
515                        relation,
516                    },
517                    Addition {
518                        entry_mode,
519                        oid,
520                        relation,
521                    } => gix_diff::tree::recorder::Change::Addition {
522                        entry_mode,
523                        oid,
524                        path: self.inner.path_clone(),
525                        relation,
526                    },
527                    Modification {
528                        previous_entry_mode,
529                        previous_oid,
530                        entry_mode,
531                        oid,
532                    } => gix_diff::tree::recorder::Change::Modification {
533                        previous_entry_mode,
534                        previous_oid,
535                        entry_mode,
536                        oid,
537                        path: self.inner.path_clone(),
538                    },
539                });
540
541                visit::Action::Cancel
542            } else {
543                visit::Action::Continue
544            }
545        }
546    }
547
548    let mut recorder = FindChangeToPath::new(file_path.into());
549    let result = gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder);
550    stats.trees_diffed += 1;
551
552    match result {
553        Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change),
554        Err(error) => Err(Error::DiffTree(error)),
555    }
556}
557
558fn blob_changes(
559    odb: impl gix_object::Find + gix_object::FindHeader,
560    resource_cache: &mut gix_diff::blob::Platform,
561    oid: ObjectId,
562    previous_oid: ObjectId,
563    file_path: &BStr,
564    diff_algorithm: gix_diff::blob::Algorithm,
565    stats: &mut Statistics,
566) -> Result<Vec<Change>, Error> {
567    /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*.
568    struct ChangeRecorder {
569        last_seen_after_end: u32,
570        hunks: Vec<Change>,
571        total_number_of_lines: u32,
572    }
573
574    impl ChangeRecorder {
575        /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed
576        /// so that the entire file is represented by [`Change`].
577        fn new(total_number_of_lines: u32) -> Self {
578            ChangeRecorder {
579                last_seen_after_end: 0,
580                hunks: Vec::new(),
581                total_number_of_lines,
582            }
583        }
584    }
585
586    impl gix_diff::blob::Sink for ChangeRecorder {
587        type Out = Vec<Change>;
588
589        fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
590            // This checks for unchanged hunks.
591            if after.start > self.last_seen_after_end {
592                self.hunks
593                    .push(Change::Unchanged(self.last_seen_after_end..after.start));
594            }
595
596            match (!before.is_empty(), !after.is_empty()) {
597                (_, true) => {
598                    self.hunks.push(Change::AddedOrReplaced(
599                        after.start..after.end,
600                        before.end - before.start,
601                    ));
602                }
603                (true, false) => {
604                    self.hunks.push(Change::Deleted(after.start, before.end - before.start));
605                }
606                (false, false) => unreachable!("BUG: imara-diff provided a non-change"),
607            }
608            self.last_seen_after_end = after.end;
609        }
610
611        fn finish(mut self) -> Self::Out {
612            if self.total_number_of_lines > self.last_seen_after_end {
613                self.hunks
614                    .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines));
615            }
616            self.hunks
617        }
618    }
619
620    resource_cache.set_resource(
621        previous_oid,
622        gix_object::tree::EntryKind::Blob,
623        file_path,
624        gix_diff::blob::ResourceKind::OldOrSource,
625        &odb,
626    )?;
627    resource_cache.set_resource(
628        oid,
629        gix_object::tree::EntryKind::Blob,
630        file_path,
631        gix_diff::blob::ResourceKind::NewOrDestination,
632        &odb,
633    )?;
634
635    let outcome = resource_cache.prepare_diff()?;
636    let input = gix_diff::blob::intern::InternedInput::new(
637        tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()),
638        tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()),
639    );
640    let number_of_lines_in_destination = input.after.len();
641    let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32);
642
643    let res = gix_diff::blob::diff(diff_algorithm, &input, change_recorder);
644    stats.blobs_diffed += 1;
645    Ok(res)
646}
647
648fn find_path_entry_in_commit(
649    odb: &impl gix_object::Find,
650    commit: &gix_hash::oid,
651    file_path: &BStr,
652    cache: Option<&gix_commitgraph::Graph>,
653    buf: &mut Vec<u8>,
654    buf2: &mut Vec<u8>,
655    stats: &mut Statistics,
656) -> Result<Option<ObjectId>, Error> {
657    let tree_id = find_commit(cache, odb, commit, buf)?.tree_id()?;
658    let tree_iter = odb.find_tree_iter(&tree_id, buf)?;
659    stats.trees_decoded += 1;
660
661    let res = tree_iter.lookup_entry(
662        odb,
663        buf2,
664        file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
665    )?;
666    stats.trees_decoded -= 1;
667    Ok(res.map(|e| e.oid))
668}
669
670type CommitTime = i64;
671
672fn commit_time(commit: gix_traverse::commit::Either<'_, '_>) -> Result<CommitTime, gix_object::decode::Error> {
673    match commit {
674        gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => {
675            commit_ref_iter.committer().map(|c| c.time.seconds)
676        }
677        gix_traverse::commit::Either::CachedCommit(commit) => Ok(commit.committer_timestamp() as i64),
678    }
679}
680
681type ParentIds = SmallVec<[(gix_hash::ObjectId, i64); 2]>;
682
683fn collect_parents(
684    commit: gix_traverse::commit::Either<'_, '_>,
685    odb: &impl gix_object::Find,
686    cache: Option<&gix_commitgraph::Graph>,
687    buf: &mut Vec<u8>,
688) -> Result<ParentIds, Error> {
689    let mut parent_ids: ParentIds = Default::default();
690    match commit {
691        gix_traverse::commit::Either::CachedCommit(commit) => {
692            let cache = cache
693                .as_ref()
694                .expect("find returned a cached commit, so we expect cache to be present");
695            for parent_pos in commit.iter_parents() {
696                let parent = cache.commit_at(parent_pos?);
697                parent_ids.push((parent.id().to_owned(), parent.committer_timestamp() as i64));
698            }
699        }
700        gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => {
701            for id in commit_ref_iter.parent_ids() {
702                let parent = odb.find_commit_iter(id.as_ref(), buf).ok();
703                let parent_commit_time = parent
704                    .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
705                    .unwrap_or_default();
706                parent_ids.push((id, parent_commit_time));
707            }
708        }
709    }
710    Ok(parent_ids)
711}
712
713/// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
714/// to unify them so the later access shows the right thing.
715pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
716    gix_diff::blob::sources::byte_lines_with_terminator(data)
717}