gix_traverse/commit/
simple.rs

1use std::{cmp::Reverse, collections::VecDeque};
2
3use gix_date::SecondsSinceUnixEpoch;
4use gix_hash::ObjectId;
5use gix_hashtable::HashSet;
6use smallvec::SmallVec;
7
8#[derive(Default, Debug, Copy, Clone)]
9/// The order with which to prioritize the search.
10pub enum CommitTimeOrder {
11    #[default]
12    /// Sort commits by newest first.
13    NewestFirst,
14    /// Sort commits by oldest first.
15    #[doc(alias = "Sort::REVERSE", alias = "git2")]
16    OldestFirst,
17}
18
19/// Specify how to sort commits during a [simple](super::Simple) traversal.
20///
21/// ### Sample History
22///
23/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
24/// (*their X-alignment doesn't matter*).
25///
26/// ```text
27/// ---1----2----4----7 <- second parent of 8
28///     \              \
29///      3----5----6----8---
30/// ```
31#[derive(Default, Debug, Copy, Clone)]
32pub enum Sorting {
33    /// Commits are sorted as they are mentioned in the commit graph.
34    ///
35    /// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`.
36    ///
37    /// ### Note
38    ///
39    /// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
40    /// as it avoids overlapping branches.
41    #[default]
42    BreadthFirst,
43    /// Commits are sorted by their commit time in the order specified, either newest or oldest first.
44    ///
45    /// The sorting applies to all currently queued commit ids and thus is full.
46    ///
47    /// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for [`NewestFirst`](CommitTimeOrder::NewestFirst),
48    /// or `1, 2, 3, 4, 5, 6, 7, 8` for [`OldestFirst`](CommitTimeOrder::OldestFirst).
49    ///
50    /// # Performance
51    ///
52    /// This mode benefits greatly from having an object_cache in `find()`
53    /// to avoid having to lookup each commit twice.
54    ByCommitTime(CommitTimeOrder),
55    /// This sorting is similar to [`ByCommitTime`](Sorting::ByCommitTime), but adds a cutoff to not return commits older than
56    /// a given time, stopping the iteration once no younger commits is queued to be traversed.
57    ///
58    /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
59    ///
60    /// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`.
61    ByCommitTimeCutoff {
62        /// The order in which to prioritize lookups.
63        order: CommitTimeOrder,
64        /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
65        seconds: gix_date::SecondsSinceUnixEpoch,
66    },
67}
68
69/// The error is part of the item returned by the [Ancestors](super::Simple) iterator.
70#[derive(Debug, thiserror::Error)]
71#[allow(missing_docs)]
72pub enum Error {
73    #[error(transparent)]
74    Find(#[from] gix_object::find::existing_iter::Error),
75    #[error(transparent)]
76    ObjectDecode(#[from] gix_object::decode::Error),
77}
78
79use Result as Either;
80type QueueKey<T> = Either<T, Reverse<T>>;
81
82/// The state used and potentially shared by multiple graph traversals.
83#[derive(Clone)]
84pub(super) struct State {
85    next: VecDeque<ObjectId>,
86    queue: gix_revwalk::PriorityQueue<QueueKey<SecondsSinceUnixEpoch>, ObjectId>,
87    buf: Vec<u8>,
88    seen: HashSet<ObjectId>,
89    parents_buf: Vec<u8>,
90    parent_ids: SmallVec<[(ObjectId, SecondsSinceUnixEpoch); 2]>,
91}
92
93///
94mod init {
95    use std::cmp::Reverse;
96
97    use gix_date::SecondsSinceUnixEpoch;
98    use gix_hash::{oid, ObjectId};
99    use gix_object::{CommitRefIter, FindExt};
100    use Err as Oldest;
101    use Ok as Newest;
102
103    use super::{
104        super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple},
105        collect_parents, CommitTimeOrder, Error, State,
106    };
107
108    impl Default for State {
109        fn default() -> Self {
110            State {
111                next: Default::default(),
112                queue: gix_revwalk::PriorityQueue::new(),
113                buf: vec![],
114                seen: Default::default(),
115                parents_buf: vec![],
116                parent_ids: Default::default(),
117            }
118        }
119    }
120
121    impl State {
122        fn clear(&mut self) {
123            self.next.clear();
124            self.queue.clear();
125            self.buf.clear();
126            self.seen.clear();
127        }
128    }
129
130    fn to_queue_key(i: i64, order: CommitTimeOrder) -> super::QueueKey<i64> {
131        match order {
132            CommitTimeOrder::NewestFirst => Newest(i),
133            CommitTimeOrder::OldestFirst => Oldest(Reverse(i)),
134        }
135    }
136
137    /// Builder
138    impl<Find, Predicate> Simple<Find, Predicate>
139    where
140        Find: gix_object::Find,
141    {
142        /// Set the `sorting` method.
143        pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
144            self.sorting = sorting;
145            match self.sorting {
146                Sorting::BreadthFirst => {
147                    self.queue_to_vecdeque();
148                }
149                Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => {
150                    let cutoff_time = self.sorting.cutoff_time();
151                    let state = &mut self.state;
152                    for commit_id in state.next.drain(..) {
153                        let commit_iter = self.objects.find_commit_iter(&commit_id, &mut state.buf)?;
154                        let time = commit_iter.committer()?.seconds();
155                        let key = to_queue_key(time, order);
156                        match (cutoff_time, order) {
157                            (Some(cutoff_time), _) if time >= cutoff_time => {
158                                state.queue.insert(key, commit_id);
159                            }
160                            (Some(_), _) => {}
161                            (None, _) => {
162                                state.queue.insert(key, commit_id);
163                            }
164                        }
165                    }
166                }
167            }
168            Ok(self)
169        }
170
171        /// Change our commit parent handling mode to the given one.
172        pub fn parents(mut self, mode: Parents) -> Self {
173            self.parents = mode;
174            if matches!(self.parents, Parents::First) {
175                self.queue_to_vecdeque();
176            }
177            self
178        }
179
180        /// Set the commitgraph as `cache` to greatly accelerate any traversal.
181        ///
182        /// The cache will be used if possible, but we will fall-back without error to using the object
183        /// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well.
184        pub fn commit_graph(mut self, cache: Option<gix_commitgraph::Graph>) -> Self {
185            self.cache = cache;
186            self
187        }
188
189        fn queue_to_vecdeque(&mut self) {
190            let state = &mut self.state;
191            state.next.extend(
192                std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new())
193                    .into_iter_unordered()
194                    .map(|(_time, id)| id),
195            );
196        }
197    }
198
199    /// Lifecycle
200    impl<Find> Simple<Find, fn(&oid) -> bool>
201    where
202        Find: gix_object::Find,
203    {
204        /// Create a new instance.
205        ///
206        /// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
207        ///   an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
208        ///   as needed.
209        /// * `tips`
210        ///   * the starting points of the iteration, usually commits
211        ///   * each commit they lead to will only be returned once, including the tip that started it
212        pub fn new(tips: impl IntoIterator<Item = impl Into<ObjectId>>, find: Find) -> Self {
213            Self::filtered(tips, find, |_| true)
214        }
215    }
216
217    /// Lifecycle
218    impl<Find, Predicate> Simple<Find, Predicate>
219    where
220        Find: gix_object::Find,
221        Predicate: FnMut(&oid) -> bool,
222    {
223        /// Create a new instance with commit filtering enabled.
224        ///
225        /// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
226        ///   an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
227        ///   as needed.
228        /// * `tips`
229        ///   * the starting points of the iteration, usually commits
230        ///   * each commit they lead to will only be returned once, including the tip that started it
231        /// * `predicate` - indicate whether a given commit should be included in the result as well
232        ///   as whether its parent commits should be traversed.
233        pub fn filtered(
234            tips: impl IntoIterator<Item = impl Into<ObjectId>>,
235            find: Find,
236            mut predicate: Predicate,
237        ) -> Self {
238            let tips = tips.into_iter();
239            let mut state = State::default();
240            {
241                state.clear();
242                state.next.reserve(tips.size_hint().0);
243                for tip in tips.map(Into::into) {
244                    let was_inserted = state.seen.insert(tip);
245                    if was_inserted && predicate(&tip) {
246                        state.next.push_back(tip);
247                    }
248                }
249            }
250            Self {
251                objects: find,
252                cache: None,
253                predicate,
254                state,
255                parents: Default::default(),
256                sorting: Default::default(),
257            }
258        }
259    }
260
261    /// Access
262    impl<Find, Predicate> Simple<Find, Predicate> {
263        /// Return an iterator for accessing data of the current commit, parsed lazily.
264        pub fn commit_iter(&self) -> CommitRefIter<'_> {
265            CommitRefIter::from_bytes(&self.state.buf)
266        }
267
268        /// Return the current commits' raw data, which can be parsed using [`gix_object::CommitRef::from_bytes()`].
269        pub fn commit_data(&self) -> &[u8] {
270            &self.state.buf
271        }
272    }
273
274    impl<Find, Predicate> Iterator for Simple<Find, Predicate>
275    where
276        Find: gix_object::Find,
277        Predicate: FnMut(&oid) -> bool,
278    {
279        type Item = Result<Info, Error>;
280
281        fn next(&mut self) -> Option<Self::Item> {
282            if matches!(self.parents, Parents::First) {
283                self.next_by_topology()
284            } else {
285                match self.sorting {
286                    Sorting::BreadthFirst => self.next_by_topology(),
287                    Sorting::ByCommitTime(order) => self.next_by_commit_date(order, None),
288                    Sorting::ByCommitTimeCutoff { seconds, order } => self.next_by_commit_date(order, seconds.into()),
289                }
290            }
291        }
292    }
293
294    impl Sorting {
295        /// If not topo sort, provide the cutoff date if present.
296        fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
297            match self {
298                Sorting::ByCommitTimeCutoff { seconds, .. } => Some(*seconds),
299                _ => None,
300            }
301        }
302    }
303
304    /// Utilities
305    impl<Find, Predicate> Simple<Find, Predicate>
306    where
307        Find: gix_object::Find,
308        Predicate: FnMut(&oid) -> bool,
309    {
310        fn next_by_commit_date(
311            &mut self,
312            order: CommitTimeOrder,
313            cutoff: Option<SecondsSinceUnixEpoch>,
314        ) -> Option<Result<Info, Error>> {
315            let state = &mut self.state;
316
317            let (commit_time, oid) = match state.queue.pop()? {
318                (Newest(t) | Oldest(Reverse(t)), o) => (t, o),
319            };
320            let mut parents: ParentIds = Default::default();
321            match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
322                Ok(Either::CachedCommit(commit)) => {
323                    if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
324                        // drop corrupt caches and try again with ODB
325                        self.cache = None;
326                        return self.next_by_commit_date(order, cutoff);
327                    }
328                    for (id, parent_commit_time) in state.parent_ids.drain(..) {
329                        parents.push(id);
330                        let was_inserted = state.seen.insert(id);
331                        if !(was_inserted && (self.predicate)(&id)) {
332                            continue;
333                        }
334
335                        let key = to_queue_key(parent_commit_time, order);
336                        match cutoff {
337                            Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
338                            Some(_) | None => state.queue.insert(key, id),
339                        }
340                    }
341                }
342                Ok(Either::CommitRefIter(commit_iter)) => {
343                    for token in commit_iter {
344                        match token {
345                            Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
346                            Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
347                                parents.push(id);
348                                let was_inserted = state.seen.insert(id);
349                                if !(was_inserted && (self.predicate)(&id)) {
350                                    continue;
351                                }
352
353                                let parent = self.objects.find_commit_iter(id.as_ref(), &mut state.parents_buf).ok();
354                                let parent_commit_time = parent
355                                    .and_then(|parent| parent.committer().ok().map(|committer| committer.seconds()))
356                                    .unwrap_or_default();
357
358                                let time = to_queue_key(parent_commit_time, order);
359                                match cutoff {
360                                    Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
361                                    Some(_) | None => state.queue.insert(time, id),
362                                }
363                            }
364                            Ok(_unused_token) => break,
365                            Err(err) => return Some(Err(err.into())),
366                        }
367                    }
368                }
369                Err(err) => return Some(Err(err.into())),
370            }
371            Some(Ok(Info {
372                id: oid,
373                parent_ids: parents,
374                commit_time: Some(commit_time),
375            }))
376        }
377    }
378
379    /// Utilities
380    impl<Find, Predicate> Simple<Find, Predicate>
381    where
382        Find: gix_object::Find,
383        Predicate: FnMut(&oid) -> bool,
384    {
385        fn next_by_topology(&mut self) -> Option<Result<Info, Error>> {
386            let state = &mut self.state;
387            let oid = state.next.pop_front()?;
388            let mut parents: ParentIds = Default::default();
389            match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
390                Ok(Either::CachedCommit(commit)) => {
391                    if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
392                        // drop corrupt caches and try again with ODB
393                        self.cache = None;
394                        return self.next_by_topology();
395                    }
396
397                    for (id, _commit_time) in state.parent_ids.drain(..) {
398                        parents.push(id);
399                        let was_inserted = state.seen.insert(id);
400                        if was_inserted && (self.predicate)(&id) {
401                            state.next.push_back(id);
402                        }
403                        if matches!(self.parents, Parents::First) {
404                            break;
405                        }
406                    }
407                }
408                Ok(Either::CommitRefIter(commit_iter)) => {
409                    for token in commit_iter {
410                        match token {
411                            Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
412                            Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
413                                parents.push(id);
414                                let was_inserted = state.seen.insert(id);
415                                if was_inserted && (self.predicate)(&id) {
416                                    state.next.push_back(id);
417                                }
418                                if matches!(self.parents, Parents::First) {
419                                    break;
420                                }
421                            }
422                            Ok(_a_token_past_the_parents) => break,
423                            Err(err) => return Some(Err(err.into())),
424                        }
425                    }
426                }
427                Err(err) => return Some(Err(err.into())),
428            }
429            Some(Ok(Info {
430                id: oid,
431                parent_ids: parents,
432                commit_time: None,
433            }))
434        }
435    }
436}
437
438fn collect_parents(
439    dest: &mut SmallVec<[(gix_hash::ObjectId, gix_date::SecondsSinceUnixEpoch); 2]>,
440    cache: Option<&gix_commitgraph::Graph>,
441    parents: gix_commitgraph::file::commit::Parents<'_>,
442) -> bool {
443    dest.clear();
444    let cache = cache.as_ref().expect("parents iter is available, backed by `cache`");
445    for parent_id in parents {
446        match parent_id {
447            Ok(pos) => dest.push({
448                let parent = cache.commit_at(pos);
449                (
450                    parent.id().to_owned(),
451                    parent.committer_timestamp() as gix_date::SecondsSinceUnixEpoch, // we can't handle errors here and trying seems overkill
452                )
453            }),
454            Err(_err) => return false,
455        }
456    }
457    true
458}