1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
// Copyright 2025 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Bisect a range of commits.
use std::collections::HashSet;
use std::pin::pin;
use std::sync::Arc;
use futures::TryStreamExt as _;
use thiserror::Error;
use crate::backend::BackendError;
use crate::backend::CommitId;
use crate::commit::Commit;
use crate::repo::Repo;
use crate::revset::ResolvedRevsetExpression;
use crate::revset::Revset;
use crate::revset::RevsetEvaluationError;
use crate::revset::RevsetExpression;
use crate::revset::RevsetStreamExt as _;
/// An error that occurred while bisecting
#[derive(Error, Debug)]
pub enum BisectionError {
/// Failed to read data from the backend
#[error("Failed to read data from the backend involved in bisection")]
BackendError(#[from] BackendError),
/// Failed to evaluate a revset
#[error("Failed to evaluate a revset involved in bisection")]
RevsetEvaluationError(#[from] RevsetEvaluationError),
}
/// Indicates whether a given commit was good, bad, or if it could not be
/// determined.
#[derive(Debug)]
pub enum Evaluation {
/// The commit was good
Good,
/// The commit was bad
Bad,
/// It could not be determined whether the commit was good or bad
Skip,
/// The commit caused an abort
Abort,
}
impl Evaluation {
/// Maps the current evaluation to its inverse.
///
/// Maps `Good`->`Bad`, `Bad`->`Good`, and keeps `Skip` as is.
pub fn invert(self) -> Self {
use Evaluation::*;
match self {
Good => Bad,
Bad => Good,
Skip => Skip,
Abort => Abort,
}
}
}
/// Performs bisection to find the first bad commit in a range.
pub struct Bisector<'repo> {
repo: &'repo dyn Repo,
input_range: Arc<ResolvedRevsetExpression>,
good_commits: HashSet<CommitId>,
bad_commits: HashSet<CommitId>,
skipped_commits: HashSet<CommitId>,
aborted: bool,
}
/// The result of bisection.
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum BisectionResult {
/// Found the first bad commit(s). It should be exactly one unless the input
/// range had multiple disjoint heads.
Found(Vec<Commit>),
/// Could not determine the first bad commit because it was in a
/// skipped range.
Indeterminate,
/// Bisection was aborted.
Abort,
}
/// The next bisection step.
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum NextStep {
/// The commit must be evaluated.
Evaluate(Commit),
/// Bisection is complete.
Done(BisectionResult),
}
impl<'repo> Bisector<'repo> {
/// Create a new bisector. The range's heads are assumed to be bad.
/// Parents of the range's roots are assumed to be good.
pub async fn new(
repo: &'repo dyn Repo,
input_range: Arc<ResolvedRevsetExpression>,
) -> Result<Self, BisectionError> {
let bad_commits = input_range
.heads()
.evaluate(repo)?
.stream()
.try_collect()
.await?;
Ok(Self {
repo,
input_range,
bad_commits,
good_commits: HashSet::new(),
skipped_commits: HashSet::new(),
aborted: false,
})
}
/// Mark a commit good.
pub fn mark_good(&mut self, id: CommitId) {
assert!(!self.bad_commits.contains(&id));
assert!(!self.skipped_commits.contains(&id));
assert!(!self.aborted);
self.good_commits.insert(id);
}
/// Mark a commit bad.
pub fn mark_bad(&mut self, id: CommitId) {
assert!(!self.good_commits.contains(&id));
assert!(!self.skipped_commits.contains(&id));
assert!(!self.aborted);
self.bad_commits.insert(id);
}
/// Mark a commit as skipped (cannot be determined if it's good or bad).
pub fn mark_skipped(&mut self, id: CommitId) {
assert!(!self.good_commits.contains(&id));
assert!(!self.bad_commits.contains(&id));
assert!(!self.aborted);
self.skipped_commits.insert(id);
}
/// Mark a commit as causing an abort
pub fn mark_abort(&mut self, id: CommitId) {
// TODO: Right now, we only use this state for triggering an abort.
// A potential improvement would be to make the CLI print out the revset with
// the current status of each change, making it possible for a user
// to restart an aborted bisect in progress.
assert!(!self.good_commits.contains(&id));
assert!(!self.bad_commits.contains(&id));
assert!(!self.skipped_commits.contains(&id));
self.aborted = true;
}
/// Mark a commit as good, bad, or skipped, according to the outcome in
/// `evaluation`.
pub fn mark(&mut self, id: CommitId, evaluation: Evaluation) {
match evaluation {
Evaluation::Good => self.mark_good(id),
Evaluation::Bad => self.mark_bad(id),
Evaluation::Skip => self.mark_skipped(id),
Evaluation::Abort => self.mark_abort(id),
}
}
/// The commits that were marked good.
pub fn good_commits(&self) -> &HashSet<CommitId> {
&self.good_commits
}
/// The commits that were marked bad.
pub fn bad_commits(&self) -> &HashSet<CommitId> {
&self.bad_commits
}
/// The commits that were skipped.
pub fn skipped_commits(&self) -> &HashSet<CommitId> {
&self.skipped_commits
}
fn candidates(&self) -> Arc<ResolvedRevsetExpression> {
let good_expr = RevsetExpression::commits(self.good_commits.iter().cloned().collect());
let bad_expr = RevsetExpression::commits(self.bad_commits.iter().cloned().collect());
let skipped_expr =
RevsetExpression::commits(self.skipped_commits.iter().cloned().collect());
self.input_range
.intersection(&good_expr.heads().range(&bad_expr.roots()))
.minus(&bad_expr)
.minus(&skipped_expr)
}
/// Returns the evaluated revset representing the remaining candidate
/// commits. Can be used for getting an estimate of how many commits are
/// left to evaluate.
pub async fn remaining_revset(&self) -> Result<Box<dyn Revset + 'repo>, BisectionError> {
Ok(self.candidates().evaluate(self.repo)?)
}
/// Find the next commit to evaluate, or determine that there are no more
/// steps.
pub async fn next_step(&mut self) -> Result<NextStep, BisectionError> {
if self.aborted {
return Ok(NextStep::Done(BisectionResult::Abort));
}
// Intersect the input range with the current bad range and then bisect it to
// find the next commit to evaluate.
// Skipped revisions are simply subtracted from the set.
// TODO: Handle long ranges of skipped revisions better
let to_evaluate_expr = self.candidates().bisect().latest(1);
let to_evaluate_set = to_evaluate_expr.evaluate(self.repo)?;
if let Some(commit_id) = pin!(to_evaluate_set.stream()).try_next().await? {
let commit = self.repo.store().get_commit_async(&commit_id).await?;
Ok(NextStep::Evaluate(commit))
} else {
let bad_expr = RevsetExpression::commits(self.bad_commits.iter().cloned().collect());
let bad_roots = bad_expr.roots().evaluate(self.repo)?;
let bad_commits: Vec<_> = bad_roots
.stream()
.commits(self.repo.store())
.try_collect()
.await?;
if bad_commits.is_empty() {
Ok(NextStep::Done(BisectionResult::Indeterminate))
} else {
Ok(NextStep::Done(BisectionResult::Found(bad_commits)))
}
}
}
}