gix_traverse/commit/topo/
iter.rs

1use gix_hash::{oid, ObjectId};
2use gix_revwalk::PriorityQueue;
3use smallvec::SmallVec;
4
5use crate::commit::{
6    find,
7    topo::{Error, Sorting, WalkFlags},
8    Either, Info, Parents, Topo,
9};
10
11pub(in crate::commit) type GenAndCommitTime = (u32, i64);
12
13// Git's priority queue works as a LIFO stack if no compare function is set,
14// which is the case for `--topo-order.` However, even in that case the initial
15// items of the queue are sorted according to the commit time before beginning
16// the walk.
17#[derive(Debug)]
18pub(in crate::commit) enum Queue {
19    Date(PriorityQueue<i64, Info>),
20    Topo(Vec<(i64, Info)>),
21}
22
23impl Queue {
24    pub(super) fn new(s: Sorting) -> Self {
25        match s {
26            Sorting::DateOrder => Self::Date(PriorityQueue::new()),
27            Sorting::TopoOrder => Self::Topo(vec![]),
28        }
29    }
30
31    pub(super) fn push(&mut self, commit_time: i64, info: Info) {
32        match self {
33            Self::Date(q) => q.insert(commit_time, info),
34            Self::Topo(q) => q.push((commit_time, info)),
35        }
36    }
37
38    fn pop(&mut self) -> Option<Info> {
39        match self {
40            Self::Date(q) => q.pop().map(|(_, info)| info),
41            Self::Topo(q) => q.pop().map(|(_, info)| info),
42        }
43    }
44
45    pub(super) fn initial_sort(&mut self) {
46        if let Self::Topo(ref mut inner_vec) = self {
47            inner_vec.sort_by(|a, b| a.0.cmp(&b.0));
48        }
49    }
50}
51
52impl<Find, Predicate> Topo<Find, Predicate>
53where
54    Find: gix_object::Find,
55{
56    pub(super) fn compute_indegrees_to_depth(&mut self, gen_cutoff: u32) -> Result<(), Error> {
57        while let Some(((gen, _), _)) = self.indegree_queue.peek() {
58            if *gen >= gen_cutoff {
59                self.indegree_walk_step()?;
60            } else {
61                break;
62            }
63        }
64
65        Ok(())
66    }
67
68    fn indegree_walk_step(&mut self) -> Result<(), Error> {
69        if let Some(((gen, _), id)) = self.indegree_queue.pop() {
70            self.explore_to_depth(gen)?;
71
72            let parents = self.collect_parents(&id)?;
73            for (id, gen_time) in parents {
74                self.indegrees.entry(id).and_modify(|e| *e += 1).or_insert(2);
75
76                let state = self.states.get_mut(&id).ok_or(Error::MissingStateUnexpected)?;
77                if !state.contains(WalkFlags::InDegree) {
78                    *state |= WalkFlags::InDegree;
79                    self.indegree_queue.insert(gen_time, id);
80                }
81            }
82        }
83        Ok(())
84    }
85
86    fn explore_to_depth(&mut self, gen_cutoff: u32) -> Result<(), Error> {
87        while let Some(((gen, _), _)) = self.explore_queue.peek() {
88            if *gen >= gen_cutoff {
89                self.explore_walk_step()?;
90            } else {
91                break;
92            }
93        }
94        Ok(())
95    }
96
97    fn explore_walk_step(&mut self) -> Result<(), Error> {
98        if let Some((_, id)) = self.explore_queue.pop() {
99            let parents = self.collect_parents(&id)?;
100            self.process_parents(&id, &parents)?;
101
102            for (id, gen_time) in parents {
103                let state = self.states.get_mut(&id).ok_or(Error::MissingStateUnexpected)?;
104
105                if !state.contains(WalkFlags::Explored) {
106                    *state |= WalkFlags::Explored;
107                    self.explore_queue.insert(gen_time, id);
108                }
109            }
110        }
111        Ok(())
112    }
113
114    fn expand_topo_walk(&mut self, id: &oid) -> Result<(), Error> {
115        let parents = self.collect_parents(id)?;
116        self.process_parents(id, &parents)?;
117
118        for (pid, (parent_gen, parent_commit_time)) in parents {
119            let parent_state = self.states.get(&pid).ok_or(Error::MissingStateUnexpected)?;
120            if parent_state.contains(WalkFlags::Uninteresting) {
121                continue;
122            }
123
124            if parent_gen < self.min_gen {
125                self.min_gen = parent_gen;
126                self.compute_indegrees_to_depth(self.min_gen)?;
127            }
128
129            let i = self.indegrees.get_mut(&pid).ok_or(Error::MissingIndegreeUnexpected)?;
130            *i -= 1;
131            if *i != 1 {
132                continue;
133            }
134
135            let parent_ids = self.collect_all_parents(&pid)?.into_iter().map(|e| e.0).collect();
136            self.topo_queue.push(
137                parent_commit_time,
138                Info {
139                    id: pid,
140                    parent_ids,
141                    commit_time: Some(parent_commit_time),
142                },
143            );
144        }
145
146        Ok(())
147    }
148
149    fn process_parents(&mut self, id: &oid, parents: &[(ObjectId, GenAndCommitTime)]) -> Result<(), Error> {
150        let state = self.states.get_mut(id).ok_or(Error::MissingStateUnexpected)?;
151        if state.contains(WalkFlags::Added) {
152            return Ok(());
153        }
154
155        *state |= WalkFlags::Added;
156
157        // If the current commit is uninteresting we pass that on to ALL
158        // parents, otherwise we set the Seen flag.
159        let (pass, insert) = if state.contains(WalkFlags::Uninteresting) {
160            let flags = WalkFlags::Uninteresting;
161            for (id, _) in parents {
162                let grand_parents = self.collect_all_parents(id)?;
163
164                for (id, _) in &grand_parents {
165                    self.states
166                        .entry(*id)
167                        .and_modify(|s| *s |= WalkFlags::Uninteresting)
168                        .or_insert(WalkFlags::Uninteresting | WalkFlags::Seen);
169                }
170            }
171            (flags, flags)
172        } else {
173            // NOTE: git sets SEEN like we do but keeps the SYMMETRIC_LEFT and
174            // ANCENSTRY_PATH if they are set, but they have no purpose here.
175            let flags = WalkFlags::empty();
176            (flags, WalkFlags::Seen)
177        };
178
179        for (id, _) in parents {
180            self.states.entry(*id).and_modify(|s| *s |= pass).or_insert(insert);
181        }
182        Ok(())
183    }
184
185    fn collect_parents(&mut self, id: &oid) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
186        collect_parents(
187            &mut self.commit_graph,
188            &self.find,
189            id,
190            matches!(self.parents, Parents::First),
191            &mut self.buf,
192        )
193    }
194
195    // Same as collect_parents but disregards the first_parent flag
196    pub(super) fn collect_all_parents(
197        &mut self,
198        id: &oid,
199    ) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
200        collect_parents(&mut self.commit_graph, &self.find, id, false, &mut self.buf)
201    }
202
203    fn pop_commit(&mut self) -> Option<Result<Info, Error>> {
204        let commit = self.topo_queue.pop()?;
205        let i = match self.indegrees.get_mut(&commit.id) {
206            Some(i) => i,
207            None => {
208                return Some(Err(Error::MissingIndegreeUnexpected));
209            }
210        };
211
212        *i = 0;
213        if let Err(e) = self.expand_topo_walk(&commit.id) {
214            return Some(Err(e));
215        }
216
217        Some(Ok(commit))
218    }
219}
220
221impl<Find, Predicate> Iterator for Topo<Find, Predicate>
222where
223    Find: gix_object::Find,
224    Predicate: FnMut(&oid) -> bool,
225{
226    type Item = Result<Info, Error>;
227
228    fn next(&mut self) -> Option<Self::Item> {
229        loop {
230            match self.pop_commit()? {
231                Ok(id) => {
232                    if (self.predicate)(&id.id) {
233                        return Some(Ok(id));
234                    }
235                }
236                Err(e) => return Some(Err(e)),
237            }
238        }
239    }
240}
241
242fn collect_parents<Find>(
243    cache: &mut Option<gix_commitgraph::Graph>,
244    f: Find,
245    id: &oid,
246    first_only: bool,
247    buf: &mut Vec<u8>,
248) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error>
249where
250    Find: gix_object::Find,
251{
252    let mut parents = SmallVec::<[(ObjectId, GenAndCommitTime); 1]>::new();
253    match find(cache.as_ref(), &f, id, buf)? {
254        Either::CommitRefIter(c) => {
255            for token in c {
256                use gix_object::commit::ref_iter::Token as T;
257                match token {
258                    Ok(T::Tree { .. }) => continue,
259                    Ok(T::Parent { id }) => {
260                        parents.push((id, (0, 0))); // Dummy numbers to be filled in
261                        if first_only {
262                            break;
263                        }
264                    }
265                    Ok(_past_parents) => break,
266                    Err(err) => return Err(err.into()),
267                }
268            }
269            // Need to check the cache again. That a commit is not in the cache
270            // doesn't mean a parent is not.
271            for (id, gen_time) in parents.iter_mut() {
272                let commit = find(cache.as_ref(), &f, id, buf)?;
273                *gen_time = gen_and_commit_time(commit)?;
274            }
275        }
276        Either::CachedCommit(c) => {
277            for pos in c.iter_parents() {
278                let Ok(pos) = pos else {
279                    // drop corrupt cache and use ODB from now on.
280                    *cache = None;
281                    return collect_parents(cache, f, id, first_only, buf);
282                };
283                let parent_commit = cache
284                    .as_ref()
285                    .expect("cache exists if CachedCommit was returned")
286                    .commit_at(pos);
287                parents.push((
288                    parent_commit.id().into(),
289                    (parent_commit.generation(), parent_commit.committer_timestamp() as i64),
290                ));
291                if first_only {
292                    break;
293                }
294            }
295        }
296    }
297    Ok(parents)
298}
299
300pub(super) fn gen_and_commit_time(c: Either<'_, '_>) -> Result<GenAndCommitTime, Error> {
301    match c {
302        Either::CommitRefIter(c) => {
303            let mut commit_time = 0;
304            for token in c {
305                use gix_object::commit::ref_iter::Token as T;
306                match token {
307                    Ok(T::Tree { .. }) => continue,
308                    Ok(T::Parent { .. }) => continue,
309                    Ok(T::Author { .. }) => continue,
310                    Ok(T::Committer { signature }) => {
311                        commit_time = signature.seconds();
312                        break;
313                    }
314                    Ok(_unused_token) => break,
315                    Err(err) => return Err(err.into()),
316                }
317            }
318            Ok((gix_commitgraph::GENERATION_NUMBER_INFINITY, commit_time))
319        }
320        Either::CachedCommit(c) => Ok((c.generation(), c.committer_timestamp() as i64)),
321    }
322}