Skip to main content

jj_lib/
bisect.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Bisect a range of commits.
16
17use std::collections::HashSet;
18use std::pin::pin;
19use std::sync::Arc;
20
21use futures::TryStreamExt as _;
22use thiserror::Error;
23
24use crate::backend::BackendError;
25use crate::backend::CommitId;
26use crate::commit::Commit;
27use crate::repo::Repo;
28use crate::revset::ResolvedRevsetExpression;
29use crate::revset::Revset;
30use crate::revset::RevsetEvaluationError;
31use crate::revset::RevsetExpression;
32use crate::revset::RevsetStreamExt as _;
33
34/// An error that occurred while bisecting
35#[derive(Error, Debug)]
36pub enum BisectionError {
37    /// Failed to read data from the backend
38    #[error("Failed to read data from the backend involved in bisection")]
39    BackendError(#[from] BackendError),
40    /// Failed to evaluate a revset
41    #[error("Failed to evaluate a revset involved in bisection")]
42    RevsetEvaluationError(#[from] RevsetEvaluationError),
43}
44
45/// Indicates whether a given commit was good, bad, or if it could not be
46/// determined.
47#[derive(Debug)]
48pub enum Evaluation {
49    /// The commit was good
50    Good,
51    /// The commit was bad
52    Bad,
53    /// It could not be determined whether the commit was good or bad
54    Skip,
55    /// The commit caused an abort
56    Abort,
57}
58
59impl Evaluation {
60    /// Maps the current evaluation to its inverse.
61    ///
62    /// Maps `Good`->`Bad`, `Bad`->`Good`, and keeps `Skip` as is.
63    pub fn invert(self) -> Self {
64        use Evaluation::*;
65        match self {
66            Good => Bad,
67            Bad => Good,
68            Skip => Skip,
69            Abort => Abort,
70        }
71    }
72}
73
74/// Performs bisection to find the first bad commit in a range.
75pub struct Bisector<'repo> {
76    repo: &'repo dyn Repo,
77    input_range: Arc<ResolvedRevsetExpression>,
78    good_commits: HashSet<CommitId>,
79    bad_commits: HashSet<CommitId>,
80    skipped_commits: HashSet<CommitId>,
81    aborted: bool,
82}
83
84/// The result of bisection.
85#[derive(Debug, PartialEq, Eq, Clone)]
86pub enum BisectionResult {
87    /// Found the first bad commit(s). It should be exactly one unless the input
88    /// range had multiple disjoint heads.
89    Found(Vec<Commit>),
90    /// Could not determine the first bad commit because it was in a
91    /// skipped range.
92    Indeterminate,
93    /// Bisection was aborted.
94    Abort,
95}
96
97/// The next bisection step.
98#[derive(Debug, PartialEq, Eq, Clone)]
99pub enum NextStep {
100    /// The commit must be evaluated.
101    Evaluate(Commit),
102    /// Bisection is complete.
103    Done(BisectionResult),
104}
105
106impl<'repo> Bisector<'repo> {
107    /// Create a new bisector. The range's heads are assumed to be bad.
108    /// Parents of the range's roots are assumed to be good.
109    pub async fn new(
110        repo: &'repo dyn Repo,
111        input_range: Arc<ResolvedRevsetExpression>,
112    ) -> Result<Self, BisectionError> {
113        let bad_commits = input_range
114            .heads()
115            .evaluate(repo)?
116            .stream()
117            .try_collect()
118            .await?;
119        Ok(Self {
120            repo,
121            input_range,
122            bad_commits,
123            good_commits: HashSet::new(),
124            skipped_commits: HashSet::new(),
125            aborted: false,
126        })
127    }
128
129    /// Mark a commit good.
130    pub fn mark_good(&mut self, id: CommitId) {
131        assert!(!self.bad_commits.contains(&id));
132        assert!(!self.skipped_commits.contains(&id));
133        assert!(!self.aborted);
134        self.good_commits.insert(id);
135    }
136
137    /// Mark a commit bad.
138    pub fn mark_bad(&mut self, id: CommitId) {
139        assert!(!self.good_commits.contains(&id));
140        assert!(!self.skipped_commits.contains(&id));
141        assert!(!self.aborted);
142        self.bad_commits.insert(id);
143    }
144
145    /// Mark a commit as skipped (cannot be determined if it's good or bad).
146    pub fn mark_skipped(&mut self, id: CommitId) {
147        assert!(!self.good_commits.contains(&id));
148        assert!(!self.bad_commits.contains(&id));
149        assert!(!self.aborted);
150        self.skipped_commits.insert(id);
151    }
152
153    /// Mark a commit as causing an abort
154    pub fn mark_abort(&mut self, id: CommitId) {
155        // TODO: Right now, we only use this state for triggering an abort.
156        // A potential improvement would be to make the CLI print out the revset with
157        // the current status of each change, making it possible for a user
158        // to restart an aborted bisect in progress.
159        assert!(!self.good_commits.contains(&id));
160        assert!(!self.bad_commits.contains(&id));
161        assert!(!self.skipped_commits.contains(&id));
162        self.aborted = true;
163    }
164
165    /// Mark a commit as good, bad, or skipped, according to the outcome in
166    /// `evaluation`.
167    pub fn mark(&mut self, id: CommitId, evaluation: Evaluation) {
168        match evaluation {
169            Evaluation::Good => self.mark_good(id),
170            Evaluation::Bad => self.mark_bad(id),
171            Evaluation::Skip => self.mark_skipped(id),
172            Evaluation::Abort => self.mark_abort(id),
173        }
174    }
175
176    /// The commits that were marked good.
177    pub fn good_commits(&self) -> &HashSet<CommitId> {
178        &self.good_commits
179    }
180
181    /// The commits that were marked bad.
182    pub fn bad_commits(&self) -> &HashSet<CommitId> {
183        &self.bad_commits
184    }
185
186    /// The commits that were skipped.
187    pub fn skipped_commits(&self) -> &HashSet<CommitId> {
188        &self.skipped_commits
189    }
190
191    fn candidates(&self) -> Arc<ResolvedRevsetExpression> {
192        let good_expr = RevsetExpression::commits(self.good_commits.iter().cloned().collect());
193        let bad_expr = RevsetExpression::commits(self.bad_commits.iter().cloned().collect());
194        let skipped_expr =
195            RevsetExpression::commits(self.skipped_commits.iter().cloned().collect());
196
197        self.input_range
198            .intersection(&good_expr.heads().range(&bad_expr.roots()))
199            .minus(&bad_expr)
200            .minus(&skipped_expr)
201    }
202
203    /// Returns the evaluated revset representing the remaining candidate
204    /// commits. Can be used for getting an estimate of how many commits are
205    /// left to evaluate.
206    pub async fn remaining_revset(&self) -> Result<Box<dyn Revset + 'repo>, BisectionError> {
207        Ok(self.candidates().evaluate(self.repo)?)
208    }
209
210    /// Find the next commit to evaluate, or determine that there are no more
211    /// steps.
212    pub async fn next_step(&mut self) -> Result<NextStep, BisectionError> {
213        if self.aborted {
214            return Ok(NextStep::Done(BisectionResult::Abort));
215        }
216        // Intersect the input range with the current bad range and then bisect it to
217        // find the next commit to evaluate.
218        // Skipped revisions are simply subtracted from the set.
219        // TODO: Handle long ranges of skipped revisions better
220        let to_evaluate_expr = self.candidates().bisect().latest(1);
221        let to_evaluate_set = to_evaluate_expr.evaluate(self.repo)?;
222        if let Some(commit_id) = pin!(to_evaluate_set.stream()).try_next().await? {
223            let commit = self.repo.store().get_commit_async(&commit_id).await?;
224            Ok(NextStep::Evaluate(commit))
225        } else {
226            let bad_expr = RevsetExpression::commits(self.bad_commits.iter().cloned().collect());
227            let bad_roots = bad_expr.roots().evaluate(self.repo)?;
228            let bad_commits: Vec<_> = bad_roots
229                .stream()
230                .commits(self.repo.store())
231                .try_collect()
232                .await?;
233            if bad_commits.is_empty() {
234                Ok(NextStep::Done(BisectionResult::Indeterminate))
235            } else {
236                Ok(NextStep::Done(BisectionResult::Found(bad_commits)))
237            }
238        }
239    }
240}