git_traverse/
commit.rs

1/// An iterator over the ancestors one or more starting commits
2pub struct Ancestors<Find, Predicate, StateMut> {
3    find: Find,
4    predicate: Predicate,
5    state: StateMut,
6    parents: Parents,
7    sorting: Sorting,
8}
9
10/// Specify how to handle commit parents during traversal.
11#[derive(Copy, Clone)]
12pub enum Parents {
13    /// Traverse all parents, useful for traversing the entire ancestry.
14    All,
15    /// Only traverse along the first parent, which commonly ignores all branches.
16    First,
17}
18
19impl Default for Parents {
20    fn default() -> Self {
21        Parents::All
22    }
23}
24
25/// Specify how to sort commits during traversal.
26#[derive(Copy, Clone)]
27pub enum Sorting {
28    /// Commits are sorted as they are mentioned in the commit graph.
29    Topological,
30    /// Commits are sorted by their commit time in descending order, that is newest first.
31    ///
32    /// The sorting applies to all currently queued commit ids and thus is full.
33    ///
34    /// # Performance
35    ///
36    /// This mode benefits greatly from having an object_cache in `find()`
37    /// to avoid having to lookup each commit twice.
38    ByCommitTimeNewestFirst,
39    /// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
40    /// a given time, stopping the iteration once no younger commits is queued to be traversed.
41    ///
42    /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
43    ByCommitTimeNewestFirstCutoffOlderThan {
44        /// The amount of seconds since unix epoch, the same value obtained by any `git_date::Time` structure and the way git counts time.
45        time_in_seconds_since_epoch: u32,
46    },
47}
48
49impl Default for Sorting {
50    fn default() -> Self {
51        Sorting::Topological
52    }
53}
54
55///
56pub mod ancestors {
57    use std::{
58        borrow::{Borrow, BorrowMut},
59        collections::VecDeque,
60        iter::FromIterator,
61    };
62
63    use git_hash::{oid, ObjectId};
64    use git_hashtable::HashSet;
65    use git_object::CommitRefIter;
66
67    use crate::commit::{Ancestors, Parents, Sorting};
68
69    /// The error is part of the item returned by the [Ancestors] iterator.
70    #[derive(Debug, thiserror::Error)]
71    #[allow(missing_docs)]
72    pub enum Error {
73        #[error("The commit {oid} could not be found")]
74        FindExisting {
75            oid: ObjectId,
76            source: Box<dyn std::error::Error + Send + Sync + 'static>,
77        },
78        #[error(transparent)]
79        ObjectDecode(#[from] git_object::decode::Error),
80    }
81
82    type TimeInSeconds = u32;
83
84    /// The state used and potentially shared by multiple graph traversals.
85    #[derive(Default, Clone)]
86    pub struct State {
87        next: VecDeque<(ObjectId, TimeInSeconds)>,
88        buf: Vec<u8>,
89        seen: HashSet<ObjectId>,
90        parents_buf: Vec<u8>,
91    }
92
93    impl State {
94        fn clear(&mut self) {
95            self.next.clear();
96            self.buf.clear();
97            self.seen.clear();
98        }
99    }
100
101    /// Builder
102    impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut> {
103        /// Change our commit parent handling mode to the given one.
104        pub fn parents(mut self, mode: Parents) -> Self {
105            self.parents = mode;
106            self
107        }
108    }
109
110    /// Builder
111    impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
112    where
113        Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
114        StateMut: BorrowMut<State>,
115        E: std::error::Error + Send + Sync + 'static,
116    {
117        /// Set the sorting method, either topological or by author date
118        pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
119            self.sorting = sorting;
120            if !matches!(self.sorting, Sorting::Topological) {
121                let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new()));
122                let state = self.state.borrow_mut();
123                for (commit_id, commit_time) in state.next.iter_mut() {
124                    let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting {
125                        oid: *commit_id,
126                        source: err.into(),
127                    })?;
128                    let time = commit_iter.committer()?.time.seconds_since_unix_epoch;
129                    match &mut cutoff_time_storage {
130                        Some((cutoff_time, storage)) if time >= *cutoff_time => {
131                            storage.push((*commit_id, time));
132                        }
133                        Some(_) => {}
134                        None => *commit_time = time,
135                    }
136                }
137                let mut v = match cutoff_time_storage {
138                    Some((_, storage)) => storage,
139                    None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()),
140                };
141                v.sort_by(|a, b| a.1.cmp(&b.1).reverse());
142                state.next = v.into();
143            }
144            Ok(self)
145        }
146    }
147
148    /// Initialization
149    impl<Find, StateMut, E> Ancestors<Find, fn(&oid) -> bool, StateMut>
150    where
151        Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
152        StateMut: BorrowMut<State>,
153        E: std::error::Error + Send + Sync + 'static,
154    {
155        /// Create a new instance.
156        ///
157        /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning
158        ///    an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
159        ///    as needed.
160        /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing
161        ///   this state.
162        /// * `tips`
163        ///   * the starting points of the iteration, usually commits
164        ///   * each commit they lead to will only be returned once, including the tip that started it
165        pub fn new(tips: impl IntoIterator<Item = impl Into<ObjectId>>, state: StateMut, find: Find) -> Self {
166            Self::filtered(tips, state, find, |_| true)
167        }
168    }
169
170    /// Initialization
171    impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
172    where
173        Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
174        Predicate: FnMut(&oid) -> bool,
175        StateMut: BorrowMut<State>,
176        E: std::error::Error + Send + Sync + 'static,
177    {
178        /// Create a new instance with commit filtering enabled.
179        ///
180        /// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning
181        ///    an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
182        ///    as needed.
183        /// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing
184        ///   this state.
185        /// * `tips`
186        ///   * the starting points of the iteration, usually commits
187        ///   * each commit they lead to will only be returned once, including the tip that started it
188        /// * `predicate` - indicate whether a given commit should be included in the result as well
189        ///   as whether its parent commits should be traversed.
190        pub fn filtered(
191            tips: impl IntoIterator<Item = impl Into<ObjectId>>,
192            mut state: StateMut,
193            find: Find,
194            mut predicate: Predicate,
195        ) -> Self {
196            let tips = tips.into_iter();
197            {
198                let state = state.borrow_mut();
199                state.clear();
200                state.next.reserve(tips.size_hint().0);
201                for tip in tips.map(Into::into) {
202                    let was_inserted = state.seen.insert(tip);
203                    if was_inserted && predicate(&tip) {
204                        state.next.push_back((tip, 0));
205                    }
206                }
207            }
208            Self {
209                find,
210                predicate,
211                state,
212                parents: Default::default(),
213                sorting: Default::default(),
214            }
215        }
216    }
217    /// Access
218    impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut>
219    where
220        StateMut: Borrow<State>,
221    {
222        /// Return an iterator for accessing more of the current commits data.
223        pub fn commit_iter(&self) -> CommitRefIter<'_> {
224            CommitRefIter::from_bytes(&self.state.borrow().buf)
225        }
226    }
227
228    impl<Find, Predicate, StateMut, E> Iterator for Ancestors<Find, Predicate, StateMut>
229    where
230        Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
231        Predicate: FnMut(&oid) -> bool,
232        StateMut: BorrowMut<State>,
233        E: std::error::Error + Send + Sync + 'static,
234    {
235        type Item = Result<ObjectId, Error>;
236
237        fn next(&mut self) -> Option<Self::Item> {
238            if matches!(self.parents, Parents::First) {
239                self.next_by_topology()
240            } else {
241                match self.sorting {
242                    Sorting::Topological => self.next_by_topology(),
243                    Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
244                    Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
245                        time_in_seconds_since_epoch,
246                    } => self.next_by_commit_date(time_in_seconds_since_epoch.into()),
247                }
248            }
249        }
250    }
251
252    impl Sorting {
253        /// If not topo sort, provide the cutoff date if present.
254        fn cutoff_time(&self) -> Option<u32> {
255            match self {
256                Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
257                    time_in_seconds_since_epoch,
258                } => Some(*time_in_seconds_since_epoch),
259                _ => None,
260            }
261        }
262    }
263
264    /// Utilities
265    impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
266    where
267        Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
268        Predicate: FnMut(&oid) -> bool,
269        StateMut: BorrowMut<State>,
270        E: std::error::Error + Send + Sync + 'static,
271    {
272        fn next_by_commit_date(&mut self, cutoff_older_than: Option<TimeInSeconds>) -> Option<Result<ObjectId, Error>> {
273            let state = self.state.borrow_mut();
274
275            let (oid, _commit_time) = state.next.pop_front()?;
276            match (self.find)(&oid, &mut state.buf) {
277                Ok(commit_iter) => {
278                    let mut count = 0;
279                    for token in commit_iter {
280                        count += 1;
281                        let is_first = count == 1;
282                        match token {
283                            Ok(git_object::commit::ref_iter::Token::Tree { .. }) => continue,
284                            Ok(git_object::commit::ref_iter::Token::Parent { id }) => {
285                                let was_inserted = state.seen.insert(id);
286                                if !(was_inserted && (self.predicate)(&id)) {
287                                    if is_first && matches!(self.parents, Parents::First) {
288                                        break;
289                                    } else {
290                                        continue;
291                                    }
292                                }
293
294                                let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok();
295                                let parent_commit_time = parent
296                                    .and_then(|parent| {
297                                        parent
298                                            .committer()
299                                            .ok()
300                                            .map(|committer| committer.time.seconds_since_unix_epoch)
301                                    })
302                                    .unwrap_or_default();
303
304                                let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse())
305                                {
306                                    Ok(_) => None,
307                                    Err(pos) => Some(pos),
308                                };
309                                match cutoff_older_than {
310                                    Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
311                                    Some(_) | None => match pos {
312                                        Some(pos) => state.next.insert(pos, (id, parent_commit_time)),
313                                        None => state.next.push_back((id, parent_commit_time)),
314                                    },
315                                }
316
317                                if is_first && matches!(self.parents, Parents::First) {
318                                    break;
319                                }
320                            }
321                            Ok(_unused_token) => break,
322                            Err(err) => return Some(Err(err.into())),
323                        }
324                    }
325                }
326                Err(err) => {
327                    return Some(Err(Error::FindExisting {
328                        oid,
329                        source: err.into(),
330                    }))
331                }
332            }
333            Some(Ok(oid))
334        }
335    }
336
337    /// Utilities
338    impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
339    where
340        Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
341        Predicate: FnMut(&oid) -> bool,
342        StateMut: BorrowMut<State>,
343        E: std::error::Error + Send + Sync + 'static,
344    {
345        fn next_by_topology(&mut self) -> Option<Result<ObjectId, Error>> {
346            let state = self.state.borrow_mut();
347            let (oid, _commit_time) = state.next.pop_front()?;
348            match (self.find)(&oid, &mut state.buf) {
349                Ok(commit_iter) => {
350                    for token in commit_iter {
351                        match token {
352                            Ok(git_object::commit::ref_iter::Token::Tree { .. }) => continue,
353                            Ok(git_object::commit::ref_iter::Token::Parent { id }) => {
354                                let was_inserted = state.seen.insert(id);
355                                if was_inserted && (self.predicate)(&id) {
356                                    state.next.push_back((id, 0));
357                                }
358                                if matches!(self.parents, Parents::First) {
359                                    break;
360                                }
361                            }
362                            Ok(_a_token_past_the_parents) => break,
363                            Err(err) => return Some(Err(err.into())),
364                        }
365                    }
366                }
367                Err(err) => {
368                    return Some(Err(Error::FindExisting {
369                        oid,
370                        source: err.into(),
371                    }))
372                }
373            }
374            Some(Ok(oid))
375        }
376    }
377}