jj_lib/
evolution.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utility for commit evolution history.
16
17use std::collections::BTreeMap;
18use std::collections::HashMap;
19use std::collections::VecDeque;
20use std::slice;
21
22use itertools::Itertools as _;
23use thiserror::Error;
24
25use crate::backend::BackendError;
26use crate::backend::BackendResult;
27use crate::backend::CommitId;
28use crate::commit::Commit;
29use crate::dag_walk;
30use crate::op_store::OpStoreError;
31use crate::op_store::OpStoreResult;
32use crate::op_walk;
33use crate::operation::Operation;
34use crate::repo::ReadonlyRepo;
35use crate::repo::Repo as _;
36
37/// Commit with predecessor information.
38#[derive(Clone, Debug, serde::Serialize)]
39pub struct CommitEvolutionEntry {
40    /// Commit id and metadata.
41    pub commit: Commit,
42    /// Operation where the commit was created or rewritten.
43    pub operation: Option<Operation>,
44    /// Reachable predecessor ids reconstructed from the commit metadata. This
45    /// should be set if the associated `operation` is unknown.
46    // TODO: remove with legacy commit.predecessors support
47    #[serde(skip)]
48    reachable_predecessors: Option<Vec<CommitId>>,
49}
50
51impl CommitEvolutionEntry {
52    /// Predecessor ids of this commit.
53    pub fn predecessor_ids(&self) -> &[CommitId] {
54        match &self.operation {
55            Some(op) => op.predecessors_for_commit(self.commit.id()).unwrap(),
56            None => self.reachable_predecessors.as_ref().unwrap(),
57        }
58    }
59
60    /// Predecessor commit objects of this commit.
61    pub fn predecessors(&self) -> impl ExactSizeIterator<Item = BackendResult<Commit>> + use<'_> {
62        let store = self.commit.store();
63        self.predecessor_ids().iter().map(|id| store.get_commit(id))
64    }
65}
66
67#[expect(missing_docs)]
68#[derive(Debug, Error)]
69pub enum WalkPredecessorsError {
70    #[error(transparent)]
71    Backend(#[from] BackendError),
72    #[error(transparent)]
73    OpStore(#[from] OpStoreError),
74    #[error("Predecessors cycle detected around commit {0}")]
75    CycleDetected(CommitId),
76}
77
78/// Walks operations to emit commit predecessors in reverse topological order.
79pub fn walk_predecessors<'repo>(
80    repo: &'repo ReadonlyRepo,
81    start_commits: &[CommitId],
82) -> impl Iterator<Item = Result<CommitEvolutionEntry, WalkPredecessorsError>> + use<'repo> {
83    WalkPredecessors {
84        repo,
85        op_ancestors: op_walk::walk_ancestors(slice::from_ref(repo.operation())),
86        to_visit: start_commits.to_vec(),
87        queued: VecDeque::new(),
88    }
89}
90
91struct WalkPredecessors<'repo, I> {
92    repo: &'repo ReadonlyRepo,
93    op_ancestors: I,
94    to_visit: Vec<CommitId>,
95    queued: VecDeque<CommitEvolutionEntry>,
96}
97
98impl<I> WalkPredecessors<'_, I>
99where
100    I: Iterator<Item = OpStoreResult<Operation>>,
101{
102    fn try_next(&mut self) -> Result<Option<CommitEvolutionEntry>, WalkPredecessorsError> {
103        while !self.to_visit.is_empty() && self.queued.is_empty() {
104            let Some(op) = self.op_ancestors.next().transpose()? else {
105                // Scanned all operations, no fallback needed.
106                self.flush_commits()?;
107                break;
108            };
109            if !op.stores_commit_predecessors() {
110                // There may be concurrent ops, but let's simply switch to the
111                // legacy commit traversal. Operation history should be mostly
112                // linear.
113                self.scan_commits()?;
114                break;
115            }
116            self.visit_op(&op)?;
117        }
118        Ok(self.queued.pop_front())
119    }
120
121    /// Looks for predecessors within the given operation.
122    fn visit_op(&mut self, op: &Operation) -> Result<(), WalkPredecessorsError> {
123        let mut to_emit = Vec::new(); // transitive edges should be short
124        let mut has_dup = false;
125        let mut i = 0;
126        while let Some(cur_id) = self.to_visit.get(i) {
127            if let Some(next_ids) = op.predecessors_for_commit(cur_id) {
128                if to_emit.contains(cur_id) {
129                    self.to_visit.remove(i);
130                    has_dup = true;
131                    continue;
132                }
133                to_emit.extend(self.to_visit.splice(i..=i, next_ids.iter().cloned()));
134            } else {
135                i += 1;
136            }
137        }
138
139        let store = self.repo.store();
140        let mut emit = |id: &CommitId| -> BackendResult<()> {
141            let commit = store.get_commit(id)?;
142            self.queued.push_back(CommitEvolutionEntry {
143                commit,
144                operation: Some(op.clone()),
145                reachable_predecessors: None,
146            });
147            Ok(())
148        };
149        match &*to_emit {
150            [] => {}
151            [id] if !has_dup => emit(id)?,
152            _ => {
153                let sorted_ids = dag_walk::topo_order_reverse_ok(
154                    to_emit.iter().map(Ok),
155                    |&id| id,
156                    |&id| op.predecessors_for_commit(id).into_iter().flatten().map(Ok),
157                    |id| id, // Err(&CommitId) if graph has cycle
158                )
159                .map_err(|id| WalkPredecessorsError::CycleDetected(id.clone()))?;
160                for &id in &sorted_ids {
161                    if op.predecessors_for_commit(id).is_some() {
162                        emit(id)?;
163                    }
164                }
165            }
166        }
167        Ok(())
168    }
169
170    /// Traverses predecessors from remainder commits.
171    fn scan_commits(&mut self) -> BackendResult<()> {
172        let store = self.repo.store();
173        let index = self.repo.index();
174        let mut commit_predecessors: HashMap<CommitId, Vec<CommitId>> = HashMap::new();
175        let commits = dag_walk::topo_order_reverse_ok(
176            self.to_visit.drain(..).map(|id| store.get_commit(&id)),
177            |commit: &Commit| commit.id().clone(),
178            |commit: &Commit| {
179                let ids = commit_predecessors
180                    .entry(commit.id().clone())
181                    .or_insert_with(|| {
182                        commit
183                            .store_commit()
184                            .predecessors
185                            .iter()
186                            // exclude unreachable predecessors
187                            .filter(|id| index.has_id(id))
188                            .cloned()
189                            .collect()
190                    });
191                ids.iter().map(|id| store.get_commit(id)).collect_vec()
192            },
193            |_| panic!("graph has cycle"),
194        )?;
195        self.queued.extend(commits.into_iter().map(|commit| {
196            let predecessors = commit_predecessors
197                .remove(commit.id())
198                .expect("commit must be visited once");
199            CommitEvolutionEntry {
200                commit,
201                operation: None,
202                reachable_predecessors: Some(predecessors),
203            }
204        }));
205        Ok(())
206    }
207
208    /// Moves remainder commits to output queue.
209    fn flush_commits(&mut self) -> BackendResult<()> {
210        self.queued.reserve(self.to_visit.len());
211        for id in self.to_visit.drain(..) {
212            let commit = self.repo.store().get_commit(&id)?;
213            self.queued.push_back(CommitEvolutionEntry {
214                commit,
215                operation: None,
216                // There were no legacy operations, so the commit should have no
217                // predecessors.
218                reachable_predecessors: Some(vec![]),
219            });
220        }
221        Ok(())
222    }
223}
224
225impl<I> Iterator for WalkPredecessors<'_, I>
226where
227    I: Iterator<Item = OpStoreResult<Operation>>,
228{
229    type Item = Result<CommitEvolutionEntry, WalkPredecessorsError>;
230
231    fn next(&mut self) -> Option<Self::Item> {
232        self.try_next().transpose()
233    }
234}
235
236/// Collects predecessor records from `new_ops` to `old_ops`, and resolves
237/// transitive entries.
238///
239/// This function assumes that there exists a single greatest common ancestors
240/// between `old_ops` and `new_ops`. If `old_ops` and `new_ops` have ancestors
241/// and descendants each other, or if criss-crossed merges exist between these
242/// operations, the returned mapping would be lossy.
243pub fn accumulate_predecessors(
244    new_ops: &[Operation],
245    old_ops: &[Operation],
246) -> Result<BTreeMap<CommitId, Vec<CommitId>>, WalkPredecessorsError> {
247    if new_ops.is_empty() || old_ops.is_empty() {
248        return Ok(BTreeMap::new()); // No common ancestor exists
249    }
250
251    // Fast path for the single forward operation case.
252    if let [op] = new_ops
253        && op.parent_ids().iter().eq(old_ops.iter().map(|op| op.id()))
254    {
255        let Some(map) = &op.store_operation().commit_predecessors else {
256            return Ok(BTreeMap::new());
257        };
258        return resolve_transitive_edges(map, map.keys())
259            .map_err(|id| WalkPredecessorsError::CycleDetected(id.clone()));
260    }
261
262    // Follow reverse edges from the common ancestor to old_ops. Here we use
263    // BTreeMap to stabilize order of the reversed edges.
264    let mut accumulated = BTreeMap::new();
265    let reverse_ops = op_walk::walk_ancestors_range(old_ops, new_ops);
266    if !try_collect_predecessors_into(&mut accumulated, reverse_ops)? {
267        return Ok(BTreeMap::new());
268    }
269    let mut accumulated = reverse_edges(accumulated);
270    // Follow forward edges from new_ops to the common ancestor.
271    let forward_ops = op_walk::walk_ancestors_range(new_ops, old_ops);
272    if !try_collect_predecessors_into(&mut accumulated, forward_ops)? {
273        return Ok(BTreeMap::new());
274    }
275    let new_commit_ids = new_ops
276        .iter()
277        .filter_map(|op| op.store_operation().commit_predecessors.as_ref())
278        .flat_map(|map| map.keys());
279    resolve_transitive_edges(&accumulated, new_commit_ids)
280        .map_err(|id| WalkPredecessorsError::CycleDetected(id.clone()))
281}
282
283fn try_collect_predecessors_into(
284    collected: &mut BTreeMap<CommitId, Vec<CommitId>>,
285    ops: impl IntoIterator<Item = OpStoreResult<Operation>>,
286) -> OpStoreResult<bool> {
287    for op in ops {
288        let op = op?;
289        let Some(map) = &op.store_operation().commit_predecessors else {
290            return Ok(false);
291        };
292        // Just insert. There should be no duplicate entries.
293        collected.extend(map.iter().map(|(k, v)| (k.clone(), v.clone())));
294    }
295    Ok(true)
296}
297
298/// Resolves transitive edges in `graph` starting from the `start` nodes,
299/// returns new DAG. The returned DAG only includes edges reachable from the
300/// `start` nodes.
301fn resolve_transitive_edges<'a: 'b, 'b>(
302    graph: &'a BTreeMap<CommitId, Vec<CommitId>>,
303    start: impl IntoIterator<Item = &'b CommitId>,
304) -> Result<BTreeMap<CommitId, Vec<CommitId>>, &'b CommitId> {
305    let mut new_graph: BTreeMap<CommitId, Vec<CommitId>> = BTreeMap::new();
306    let sorted_ids = dag_walk::topo_order_forward_ok(
307        start.into_iter().map(Ok),
308        |&id| id,
309        |&id| graph.get(id).into_iter().flatten().map(Ok),
310        |id| id, // Err(&CommitId) if graph has cycle
311    )?;
312    for cur_id in sorted_ids {
313        let Some(neighbors) = graph.get(cur_id) else {
314            continue;
315        };
316        let lookup = |id| new_graph.get(id).map_or(slice::from_ref(id), Vec::as_slice);
317        let new_neighbors = match &neighbors[..] {
318            [id] => lookup(id).to_vec(), // unique() not needed
319            ids => ids.iter().flat_map(lookup).unique().cloned().collect(),
320        };
321        new_graph.insert(cur_id.clone(), new_neighbors);
322    }
323    Ok(new_graph)
324}
325
326fn reverse_edges(graph: BTreeMap<CommitId, Vec<CommitId>>) -> BTreeMap<CommitId, Vec<CommitId>> {
327    let mut new_graph: BTreeMap<CommitId, Vec<CommitId>> = BTreeMap::new();
328    for (node1, neighbors) in graph {
329        for node2 in neighbors {
330            new_graph.entry(node2).or_default().push(node1.clone());
331        }
332    }
333    new_graph
334}