jujutsu_lib/
revset_graph_iterator.rs

1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::cmp::min;
16use std::collections::{BTreeMap, HashMap, HashSet};
17
18use crate::index::{IndexEntry, IndexPosition};
19use crate::nightly_shims::BTreeMapExt;
20use crate::revset::RevsetIterator;
21
22#[derive(Debug, PartialEq, Eq, Clone, Hash)]
23pub struct RevsetGraphEdge {
24    pub target: IndexPosition,
25    pub edge_type: RevsetGraphEdgeType,
26}
27
28impl RevsetGraphEdge {
29    pub fn missing(target: IndexPosition) -> Self {
30        Self {
31            target,
32            edge_type: RevsetGraphEdgeType::Missing,
33        }
34    }
35    pub fn direct(target: IndexPosition) -> Self {
36        Self {
37            target,
38            edge_type: RevsetGraphEdgeType::Direct,
39        }
40    }
41    pub fn indirect(target: IndexPosition) -> Self {
42        Self {
43            target,
44            edge_type: RevsetGraphEdgeType::Indirect,
45        }
46    }
47}
48
49#[derive(Debug, PartialEq, Eq, Clone, Hash)]
50pub enum RevsetGraphEdgeType {
51    Missing,
52    Direct,
53    Indirect,
54}
55
56// Given an iterator over some set of revisions, yields the same revisions with
57// associated edge types.
58//
59// If a revision's parent is in the input set, then the edge will be "direct".
60// Otherwise, there will be one "indirect" edge for each closest ancestor in the
61// set, and one "missing" edge for each edge leading outside the set.
62//
63// Example (uppercase characters are in the input set):
64//
65// A          A
66// |\         |\
67// B c        B :
68// |\|     => |\:
69// d E        ~ E
70// |/          ~
71// root
72//
73// The implementation works by walking the input iterator in one commit at a
74// time. It then considers all parents of the commit. It looks ahead in the
75// input iterator far enough that all the parents will have been consumed if
76// they are in the input (and puts them away so we can emit them later). If a
77// parent of the current commit is not in the input set (i.e. it was not
78// in the look-ahead), we walk these external commits until we end up back back
79// in the input set. That walk may result in consuming more elements from the
80// input iterator. In the example above, when we consider "A", we will initially
81// look ahead to "B" and "c". When we consider edges from the external commit
82// "c", we will further consume the input iterator to "E".
83//
84// Missing edges are those that don't lead back into the input set. If all edges
85// from an external commit are missing, we consider the edge to that edge to
86// also be missing. In the example above, that means that "B" will have a
87// missing edge to "d" rather than to the root.
88//
89// The iterator can be configured to skip transitive edges that it would
90// otherwise return. In this mode (which is the default), the edge from "A" to
91// "E" in the example above would be excluded because there's also a transitive
92// path from "A" to "E" via "B". The implementation of that mode
93// adds a filtering step just before yielding the edges for a commit. The
94// filtering works doing a DFS in the simplified graph. That may require even
95// more look-ahead. Consider this example (uppercase characters are in the input
96// set):
97//
98//   J
99//  /|
100// | i
101// | |\
102// | | H
103// G | |
104// | e f
105// |  \|\
106// |   D |
107//  \ /  c
108//   b  /
109//   |/
110//   A
111//   |
112//  root
113//
114// When walking from "J", we'll find indirect edges to "H", "G", and "D". This
115// is our unfiltered set of edges, before removing transitive edges. In order to
116// know that "D" is an ancestor of "H", we need to also walk from "H". We use
117// the same search for finding edges from "H" as we used from "J". That results
118// in looking ahead all the way to "A". We could reduce the amount of look-ahead
119// by stopping at "c" since we're only interested in edges that could lead to
120// "D", but that would require extra book-keeping to remember for later that the
121// edges from "f" and "H" are only partially computed.
122pub struct RevsetGraphIterator<'revset, 'repo> {
123    input_set_iter: RevsetIterator<'revset, 'repo>,
124    // Commits in the input set we had to take out of the iterator while walking external
125    // edges. Does not necessarily include the commit we're currently about to emit.
126    look_ahead: BTreeMap<IndexPosition, IndexEntry<'repo>>,
127    // The last consumed position. This is always the smallest key in the look_ahead map, but it's
128    // faster to keep a separate field for it.
129    min_position: IndexPosition,
130    // Edges for commits not in the input set.
131    // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)?
132    edges: BTreeMap<IndexPosition, HashSet<RevsetGraphEdge>>,
133    skip_transitive_edges: bool,
134}
135
136impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
137    pub fn new(iter: RevsetIterator<'revset, 'repo>) -> RevsetGraphIterator<'revset, 'repo> {
138        RevsetGraphIterator {
139            input_set_iter: iter,
140            look_ahead: Default::default(),
141            min_position: IndexPosition::MAX,
142            edges: Default::default(),
143            skip_transitive_edges: true,
144        }
145    }
146
147    pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self {
148        self.skip_transitive_edges = skip_transitive_edges;
149        self
150    }
151
152    pub fn reversed(self) -> ReverseRevsetGraphIterator<'repo> {
153        ReverseRevsetGraphIterator::new(self)
154    }
155
156    fn next_index_entry(&mut self) -> Option<IndexEntry<'repo>> {
157        if let Some(index_entry) = self.look_ahead.pop_last_value() {
158            return Some(index_entry);
159        }
160        self.input_set_iter.next()
161    }
162
163    fn edges_from_internal_commit(
164        &mut self,
165        index_entry: &IndexEntry<'repo>,
166    ) -> HashSet<RevsetGraphEdge> {
167        if let Some(edges) = self.edges.get(&index_entry.position()) {
168            return edges.clone();
169        }
170        let mut edges = HashSet::new();
171        for parent in index_entry.parents() {
172            let parent_position = parent.position();
173            self.consume_to(parent_position);
174            if self.look_ahead.contains_key(&parent_position) {
175                edges.insert(RevsetGraphEdge::direct(parent_position));
176            } else {
177                let parent_edges = self.edges_from_external_commit(parent);
178                if parent_edges
179                    .iter()
180                    .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
181                {
182                    edges.insert(RevsetGraphEdge::missing(parent_position));
183                } else {
184                    edges.extend(parent_edges);
185                }
186            }
187        }
188        self.edges.insert(index_entry.position(), edges.clone());
189        edges
190    }
191
192    fn edges_from_external_commit(
193        &mut self,
194        index_entry: IndexEntry<'repo>,
195    ) -> HashSet<RevsetGraphEdge> {
196        let position = index_entry.position();
197        let mut stack = vec![index_entry];
198        while let Some(entry) = stack.last() {
199            let position = entry.position();
200            let mut edges = HashSet::new();
201            let mut parents_complete = true;
202            for parent in entry.parents() {
203                let parent_position = parent.position();
204                self.consume_to(parent_position);
205                if self.look_ahead.contains_key(&parent_position) {
206                    // We have found a path back into the input set
207                    edges.insert(RevsetGraphEdge::indirect(parent_position));
208                } else if let Some(parent_edges) = self.edges.get(&parent_position) {
209                    if parent_edges
210                        .iter()
211                        .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
212                    {
213                        edges.insert(RevsetGraphEdge::missing(parent_position));
214                    } else {
215                        edges.extend(parent_edges.iter().cloned());
216                    }
217                } else if parent_position < self.min_position {
218                    // The parent is not in the input set
219                    edges.insert(RevsetGraphEdge::missing(parent_position));
220                } else {
221                    // The parent is not in the input set but it's somewhere in the range
222                    // where we have commits in the input set, so continue searching.
223                    stack.push(parent);
224                    parents_complete = false;
225                }
226            }
227            if parents_complete {
228                stack.pop().unwrap();
229                self.edges.insert(position, edges);
230            }
231        }
232        self.edges.get(&position).unwrap().clone()
233    }
234
235    fn remove_transitive_edges(
236        &mut self,
237        edges: HashSet<RevsetGraphEdge>,
238    ) -> HashSet<RevsetGraphEdge> {
239        if !edges
240            .iter()
241            .any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect)
242        {
243            return edges;
244        }
245        let mut min_generation = u32::MAX;
246        let mut initial_targets = HashSet::new();
247        let mut work = vec![];
248        // To start with, add the edges one step after the input edges.
249        for edge in &edges {
250            initial_targets.insert(edge.target);
251            if edge.edge_type != RevsetGraphEdgeType::Missing {
252                let entry = self.look_ahead.get(&edge.target).unwrap().clone();
253                min_generation = min(min_generation, entry.generation_number());
254                work.extend(self.edges_from_internal_commit(&entry));
255            }
256        }
257        // Find commits reachable transitively and add them to the `unwanted` set.
258        let mut unwanted = HashSet::new();
259        while let Some(edge) = work.pop() {
260            if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position {
261                continue;
262            }
263            if !unwanted.insert(edge.target) {
264                // Already visited
265                continue;
266            }
267            if initial_targets.contains(&edge.target) {
268                // Already visited
269                continue;
270            }
271            let entry = self.look_ahead.get(&edge.target).unwrap().clone();
272            if entry.generation_number() < min_generation {
273                continue;
274            }
275            work.extend(self.edges_from_internal_commit(&entry));
276        }
277
278        edges
279            .into_iter()
280            .filter(|edge| !unwanted.contains(&edge.target))
281            .collect()
282    }
283
284    fn consume_to(&mut self, pos: IndexPosition) {
285        while pos < self.min_position {
286            if let Some(next_entry) = self.input_set_iter.next() {
287                let next_position = next_entry.position();
288                self.look_ahead.insert(next_position, next_entry);
289                self.min_position = next_position;
290            } else {
291                break;
292            }
293        }
294    }
295}
296
297impl<'revset, 'repo> Iterator for RevsetGraphIterator<'revset, 'repo> {
298    type Item = (IndexEntry<'repo>, Vec<RevsetGraphEdge>);
299
300    fn next(&mut self) -> Option<Self::Item> {
301        let index_entry = self.next_index_entry()?;
302        let mut edges = self.edges_from_internal_commit(&index_entry);
303        if self.skip_transitive_edges {
304            edges = self.remove_transitive_edges(edges);
305        }
306        let mut edges: Vec<_> = edges.into_iter().collect();
307        edges.sort_by(|edge1, edge2| edge2.target.cmp(&edge1.target));
308        Some((index_entry, edges))
309    }
310}
311
312pub struct ReverseRevsetGraphIterator<'repo> {
313    items: Vec<(IndexEntry<'repo>, Vec<RevsetGraphEdge>)>,
314}
315
316impl<'repo> ReverseRevsetGraphIterator<'repo> {
317    fn new<'revset>(input: RevsetGraphIterator<'revset, 'repo>) -> Self {
318        let mut entries = vec![];
319        let mut reverse_edges: HashMap<IndexPosition, Vec<RevsetGraphEdge>> = HashMap::new();
320        for (entry, edges) in input {
321            for RevsetGraphEdge { target, edge_type } in edges {
322                reverse_edges
323                    .entry(target)
324                    .or_default()
325                    .push(RevsetGraphEdge {
326                        target: entry.position(),
327                        edge_type,
328                    })
329            }
330            entries.push(entry);
331        }
332
333        let mut items = vec![];
334        for entry in entries.into_iter() {
335            let edges = reverse_edges
336                .get(&entry.position())
337                .cloned()
338                .unwrap_or_default();
339            items.push((entry, edges));
340        }
341        Self { items }
342    }
343}
344
345impl<'repo> Iterator for ReverseRevsetGraphIterator<'repo> {
346    type Item = (IndexEntry<'repo>, Vec<RevsetGraphEdge>);
347
348    fn next(&mut self) -> Option<Self::Item> {
349        self.items.pop()
350    }
351}