jj_lib/
evolution.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utility for commit evolution history.
16
17use std::collections::BTreeMap;
18use std::collections::HashMap;
19use std::collections::VecDeque;
20use std::collections::hash_map::Entry;
21use std::slice;
22
23use itertools::Itertools as _;
24use thiserror::Error;
25
26use crate::backend::BackendError;
27use crate::backend::BackendResult;
28use crate::backend::CommitId;
29use crate::commit::Commit;
30use crate::dag_walk;
31use crate::index::IndexError;
32use crate::op_store::OpStoreError;
33use crate::op_store::OpStoreResult;
34use crate::op_walk;
35use crate::operation::Operation;
36use crate::repo::ReadonlyRepo;
37use crate::repo::Repo as _;
38
39/// Commit with predecessor information.
40#[derive(Clone, Debug, serde::Serialize)]
41pub struct CommitEvolutionEntry {
42    /// Commit id and metadata.
43    pub commit: Commit,
44    /// Operation where the commit was created or rewritten.
45    pub operation: Option<Operation>,
46    /// Reachable predecessor ids reconstructed from the commit metadata. This
47    /// should be set if the associated `operation` is unknown.
48    // TODO: remove with legacy commit.predecessors support
49    #[serde(skip)]
50    reachable_predecessors: Option<Vec<CommitId>>,
51}
52
53impl CommitEvolutionEntry {
54    /// Predecessor ids of this commit.
55    pub fn predecessor_ids(&self) -> &[CommitId] {
56        match &self.operation {
57            Some(op) => op.predecessors_for_commit(self.commit.id()).unwrap(),
58            None => self.reachable_predecessors.as_ref().unwrap(),
59        }
60    }
61
62    /// Predecessor commit objects of this commit.
63    pub fn predecessors(&self) -> impl ExactSizeIterator<Item = BackendResult<Commit>> {
64        let store = self.commit.store();
65        self.predecessor_ids().iter().map(|id| store.get_commit(id))
66    }
67}
68
69#[expect(missing_docs)]
70#[derive(Debug, Error)]
71pub enum WalkPredecessorsError {
72    #[error(transparent)]
73    Backend(#[from] BackendError),
74    #[error(transparent)]
75    Index(#[from] IndexError),
76    #[error(transparent)]
77    OpStore(#[from] OpStoreError),
78    #[error("Predecessors cycle detected around commit {0}")]
79    CycleDetected(CommitId),
80}
81
82/// Walks operations to emit commit predecessors in reverse topological order.
83pub fn walk_predecessors<'repo>(
84    repo: &'repo ReadonlyRepo,
85    start_commits: &[CommitId],
86) -> impl Iterator<Item = Result<CommitEvolutionEntry, WalkPredecessorsError>> + use<'repo> {
87    WalkPredecessors {
88        repo,
89        op_ancestors: op_walk::walk_ancestors(slice::from_ref(repo.operation())),
90        to_visit: start_commits.to_vec(),
91        queued: VecDeque::new(),
92    }
93}
94
95struct WalkPredecessors<'repo, I> {
96    repo: &'repo ReadonlyRepo,
97    op_ancestors: I,
98    to_visit: Vec<CommitId>,
99    queued: VecDeque<CommitEvolutionEntry>,
100}
101
102impl<I> WalkPredecessors<'_, I>
103where
104    I: Iterator<Item = OpStoreResult<Operation>>,
105{
106    fn try_next(&mut self) -> Result<Option<CommitEvolutionEntry>, WalkPredecessorsError> {
107        while !self.to_visit.is_empty() && self.queued.is_empty() {
108            let Some(op) = self.op_ancestors.next().transpose()? else {
109                // Scanned all operations, no fallback needed.
110                self.flush_commits()?;
111                break;
112            };
113            if !op.stores_commit_predecessors() {
114                // There may be concurrent ops, but let's simply switch to the
115                // legacy commit traversal. Operation history should be mostly
116                // linear.
117                self.scan_commits()?;
118                break;
119            }
120            self.visit_op(&op)?;
121        }
122        Ok(self.queued.pop_front())
123    }
124
125    /// Looks for predecessors within the given operation.
126    fn visit_op(&mut self, op: &Operation) -> Result<(), WalkPredecessorsError> {
127        let mut to_emit = Vec::new(); // transitive edges should be short
128        let mut has_dup = false;
129        let mut i = 0;
130        while let Some(cur_id) = self.to_visit.get(i) {
131            if let Some(next_ids) = op.predecessors_for_commit(cur_id) {
132                if to_emit.contains(cur_id) {
133                    self.to_visit.remove(i);
134                    has_dup = true;
135                    continue;
136                }
137                to_emit.extend(self.to_visit.splice(i..=i, next_ids.iter().cloned()));
138            } else {
139                i += 1;
140            }
141        }
142
143        let store = self.repo.store();
144        let mut emit = |id: &CommitId| -> BackendResult<()> {
145            let commit = store.get_commit(id)?;
146            self.queued.push_back(CommitEvolutionEntry {
147                commit,
148                operation: Some(op.clone()),
149                reachable_predecessors: None,
150            });
151            Ok(())
152        };
153        match &*to_emit {
154            [] => {}
155            [id] if !has_dup => emit(id)?,
156            _ => {
157                let sorted_ids = dag_walk::topo_order_reverse_ok(
158                    to_emit.iter().map(Ok),
159                    |&id| id,
160                    |&id| op.predecessors_for_commit(id).into_iter().flatten().map(Ok),
161                    |id| id, // Err(&CommitId) if graph has cycle
162                )
163                .map_err(|id| WalkPredecessorsError::CycleDetected(id.clone()))?;
164                for &id in &sorted_ids {
165                    if op.predecessors_for_commit(id).is_some() {
166                        emit(id)?;
167                    }
168                }
169            }
170        }
171        Ok(())
172    }
173
174    /// Traverses predecessors from remainder commits.
175    fn scan_commits(&mut self) -> Result<(), WalkPredecessorsError> {
176        let store = self.repo.store();
177        let index = self.repo.index();
178        let mut commit_predecessors: HashMap<CommitId, Vec<CommitId>> = HashMap::new();
179        let commits = dag_walk::topo_order_reverse_ok(
180            self.to_visit.drain(..).map(|id| {
181                store
182                    .get_commit(&id)
183                    .map_err(WalkPredecessorsError::Backend)
184            }),
185            |commit: &Commit| commit.id().clone(),
186            |commit: &Commit| {
187                let ids = match commit_predecessors.entry(commit.id().clone()) {
188                    Entry::Occupied(entry) => entry.into_mut(),
189                    Entry::Vacant(entry) => {
190                        let mut filtered = vec![];
191                        for id in &commit.store_commit().predecessors {
192                            match index.has_id(id) {
193                                Ok(true) => {
194                                    filtered.push(id.clone());
195                                }
196                                Ok(false) => {
197                                    // Ignore unreachable predecessors
198                                }
199                                Err(err) => {
200                                    return vec![Err(WalkPredecessorsError::Index(err))];
201                                }
202                            }
203                        }
204                        entry.insert(filtered)
205                    }
206                };
207
208                ids.iter()
209                    .map(|id| store.get_commit(id).map_err(WalkPredecessorsError::Backend))
210                    .collect_vec()
211            },
212            |_| panic!("graph has cycle"),
213        )?;
214        self.queued.extend(commits.into_iter().map(|commit| {
215            let predecessors = commit_predecessors
216                .remove(commit.id())
217                .expect("commit must be visited once");
218            CommitEvolutionEntry {
219                commit,
220                operation: None,
221                reachable_predecessors: Some(predecessors),
222            }
223        }));
224        Ok(())
225    }
226
227    /// Moves remainder commits to output queue.
228    fn flush_commits(&mut self) -> BackendResult<()> {
229        self.queued.reserve(self.to_visit.len());
230        for id in self.to_visit.drain(..) {
231            let commit = self.repo.store().get_commit(&id)?;
232            self.queued.push_back(CommitEvolutionEntry {
233                commit,
234                operation: None,
235                // There were no legacy operations, so the commit should have no
236                // predecessors.
237                reachable_predecessors: Some(vec![]),
238            });
239        }
240        Ok(())
241    }
242}
243
244impl<I> Iterator for WalkPredecessors<'_, I>
245where
246    I: Iterator<Item = OpStoreResult<Operation>>,
247{
248    type Item = Result<CommitEvolutionEntry, WalkPredecessorsError>;
249
250    fn next(&mut self) -> Option<Self::Item> {
251        self.try_next().transpose()
252    }
253}
254
255/// Collects predecessor records from `new_ops` to `old_ops`, and resolves
256/// transitive entries.
257///
258/// This function assumes that there exists a single greatest common ancestors
259/// between `old_ops` and `new_ops`. If `old_ops` and `new_ops` have ancestors
260/// and descendants each other, or if criss-crossed merges exist between these
261/// operations, the returned mapping would be lossy.
262pub fn accumulate_predecessors(
263    new_ops: &[Operation],
264    old_ops: &[Operation],
265) -> Result<BTreeMap<CommitId, Vec<CommitId>>, WalkPredecessorsError> {
266    if new_ops.is_empty() || old_ops.is_empty() {
267        return Ok(BTreeMap::new()); // No common ancestor exists
268    }
269
270    // Fast path for the single forward operation case.
271    if let [op] = new_ops
272        && op.parent_ids().iter().eq(old_ops.iter().map(|op| op.id()))
273    {
274        let Some(map) = &op.store_operation().commit_predecessors else {
275            return Ok(BTreeMap::new());
276        };
277        return resolve_transitive_edges(map, map.keys())
278            .map_err(|id| WalkPredecessorsError::CycleDetected(id.clone()));
279    }
280
281    // Follow reverse edges from the common ancestor to old_ops. Here we use
282    // BTreeMap to stabilize order of the reversed edges.
283    let mut accumulated = BTreeMap::new();
284    let reverse_ops = op_walk::walk_ancestors_range(old_ops, new_ops);
285    if !try_collect_predecessors_into(&mut accumulated, reverse_ops)? {
286        return Ok(BTreeMap::new());
287    }
288    let mut accumulated = reverse_edges(accumulated);
289    // Follow forward edges from new_ops to the common ancestor.
290    let forward_ops = op_walk::walk_ancestors_range(new_ops, old_ops);
291    if !try_collect_predecessors_into(&mut accumulated, forward_ops)? {
292        return Ok(BTreeMap::new());
293    }
294    let new_commit_ids = new_ops
295        .iter()
296        .filter_map(|op| op.store_operation().commit_predecessors.as_ref())
297        .flat_map(|map| map.keys());
298    resolve_transitive_edges(&accumulated, new_commit_ids)
299        .map_err(|id| WalkPredecessorsError::CycleDetected(id.clone()))
300}
301
302fn try_collect_predecessors_into(
303    collected: &mut BTreeMap<CommitId, Vec<CommitId>>,
304    ops: impl IntoIterator<Item = OpStoreResult<Operation>>,
305) -> OpStoreResult<bool> {
306    for op in ops {
307        let op = op?;
308        let Some(map) = &op.store_operation().commit_predecessors else {
309            return Ok(false);
310        };
311        // Just insert. There should be no duplicate entries.
312        collected.extend(map.iter().map(|(k, v)| (k.clone(), v.clone())));
313    }
314    Ok(true)
315}
316
317/// Resolves transitive edges in `graph` starting from the `start` nodes,
318/// returns new DAG. The returned DAG only includes edges reachable from the
319/// `start` nodes.
320fn resolve_transitive_edges<'a: 'b, 'b>(
321    graph: &'a BTreeMap<CommitId, Vec<CommitId>>,
322    start: impl IntoIterator<Item = &'b CommitId>,
323) -> Result<BTreeMap<CommitId, Vec<CommitId>>, &'b CommitId> {
324    let mut new_graph: BTreeMap<CommitId, Vec<CommitId>> = BTreeMap::new();
325    let sorted_ids = dag_walk::topo_order_forward_ok(
326        start.into_iter().map(Ok),
327        |&id| id,
328        |&id| graph.get(id).into_iter().flatten().map(Ok),
329        |id| id, // Err(&CommitId) if graph has cycle
330    )?;
331    for cur_id in sorted_ids {
332        let Some(neighbors) = graph.get(cur_id) else {
333            continue;
334        };
335        let lookup = |id| new_graph.get(id).map_or(slice::from_ref(id), Vec::as_slice);
336        let new_neighbors = match &neighbors[..] {
337            [id] => lookup(id).to_vec(), // unique() not needed
338            ids => ids.iter().flat_map(lookup).unique().cloned().collect(),
339        };
340        new_graph.insert(cur_id.clone(), new_neighbors);
341    }
342    Ok(new_graph)
343}
344
345fn reverse_edges(graph: BTreeMap<CommitId, Vec<CommitId>>) -> BTreeMap<CommitId, Vec<CommitId>> {
346    let mut new_graph: BTreeMap<CommitId, Vec<CommitId>> = BTreeMap::new();
347    for (node1, neighbors) in graph {
348        for node2 in neighbors {
349            new_graph.entry(node2).or_default().push(node1.clone());
350        }
351    }
352    new_graph
353}