1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
// State types and definitions for rebase state machine.
//
// This file contains the core state machine struct and RecoveryAction enum.
/// Default maximum number of recovery attempts.
const DEFAULT_MAX_RECOVERY_ATTEMPTS: u32 = 3;
/// State machine for fault-tolerant rebase operations.
///
/// This state machine manages rebase operations with:
/// - Checkpoint-based persistence
/// - Automatic recovery from interruptions
/// - Maximum recovery attempt limits
/// - Conflict tracking
pub struct RebaseStateMachine {
/// Current checkpoint state
checkpoint: RebaseCheckpoint,
/// Maximum number of recovery attempts
max_recovery_attempts: u32,
}
impl RebaseStateMachine {
/// Create a new state machine for a rebase operation.
///
/// # Arguments
///
/// * `upstream_branch` - The branch to rebase onto
#[must_use]
pub fn new(upstream_branch: String) -> Self {
Self {
checkpoint: RebaseCheckpoint::new(upstream_branch),
max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
}
}
/// Load an existing state machine from checkpoint or create a new one.
///
/// If a checkpoint exists, this will resume from that state.
/// Otherwise, creates a new state machine.
///
/// This method handles corrupted checkpoints by:
/// - Attempting to load backup checkpoint
/// - Creating a fresh state if checkpoint is completely corrupted
///
/// # Arguments
///
/// * `upstream_branch` - The branch to rebase onto (used if no checkpoint exists)
///
/// # Returns
///
/// Returns `Ok(state_machine)` if successful, or an error if loading fails.
///
/// # Errors
///
/// Returns error if the operation fails.
#[expect(clippy::print_stderr, reason = "recovery warning messages")]
pub fn load_or_create(upstream_branch: String) -> io::Result<Self> {
if rebase_checkpoint_exists() {
// Try to load the primary checkpoint
match load_rebase_checkpoint() {
Ok(Some(checkpoint)) => {
// Successfully loaded checkpoint
Ok(Self {
checkpoint,
max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
})
}
Ok(None) => {
// Checkpoint file exists but is empty - try backup or create fresh
Self::try_load_backup_or_create(upstream_branch)
}
Err(e) => {
// Checkpoint is corrupted - try backup or create fresh
// Log the error but attempt recovery
eprintln!("Warning: Failed to load checkpoint: {e}. Attempting recovery...");
match Self::try_load_backup_or_create(upstream_branch) {
Ok(sm) => {
// Backup loaded or fresh state created - clear corrupted checkpoint
let _ = clear_rebase_checkpoint();
Ok(sm)
}
Err(backup_err) => {
// Even backup failed - return original error with context
Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Failed to load checkpoint ({e}) and backup ({backup_err}). \
Manual intervention may be required."
),
))
}
}
}
}
} else {
Ok(Self::new(upstream_branch))
}
}
/// Try to load a backup checkpoint or create a fresh state machine.
///
/// This is called when the primary checkpoint cannot be loaded.
///
/// # Arguments
///
/// * `upstream_branch` - The branch to rebase onto
///
/// # Returns
///
/// Returns `Ok(state_machine)` with either backup loaded or fresh state.
fn try_load_backup_or_create(upstream_branch: String) -> io::Result<Self> {
let workspace = WorkspaceFs::new(std::env::current_dir()?);
Ok(Self::try_load_backup_or_create_with_workspace(
&workspace,
upstream_branch,
))
}
/// Load backup checkpoint or create fresh state using workspace abstraction.
///
/// This is the workspace-aware version for pipeline code.
fn try_load_backup_or_create_with_workspace(
workspace: &dyn Workspace,
upstream_branch: String,
) -> Self {
use super::rebase_checkpoint::rebase_checkpoint_backup_path;
let backup_path_str = rebase_checkpoint_backup_path();
let backup_path = Path::new(&backup_path_str);
// Check if backup exists
if workspace.exists(backup_path) {
// Try to load the backup checkpoint directly
match workspace.read(backup_path) {
Ok(content) => match serde_json::from_str::<RebaseCheckpoint>(&content) {
Ok(checkpoint) => {
eprintln!("Successfully recovered from backup checkpoint");
return Self {
checkpoint,
max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
};
}
Err(e) => {
eprintln!("Backup checkpoint is also corrupted: {e}");
}
},
Err(e) => {
eprintln!("Failed to read backup checkpoint file: {e}");
}
}
}
// No backup available or backup is corrupted - create fresh state
eprintln!("Creating fresh state machine (checkpoint data lost)");
Self::new(upstream_branch)
}
/// Set the maximum number of recovery attempts.
#[must_use]
pub const fn with_max_recovery_attempts(mut self, max: u32) -> Self {
self.max_recovery_attempts = max;
self
}
/// Transition to a new phase and save checkpoint.
///
/// # Arguments
///
/// * `phase` - The new phase to transition to
///
/// # Returns
///
/// Returns `Ok(())` if the transition succeeded, or an error if saving failed.
///
/// # Errors
///
/// Returns error if the operation fails.
pub fn transition_to(self, phase: RebasePhase) -> (Self, io::Result<()>) {
let checkpoint = self.checkpoint.clone().with_phase(phase);
let result = save_rebase_checkpoint(&checkpoint);
(
Self {
checkpoint,
max_recovery_attempts: self.max_recovery_attempts,
},
result,
)
}
/// Record a conflict in a file.
///
/// # Arguments
///
/// * `file` - The file path that has conflicts
pub fn record_conflict(mut self, file: String) -> Self {
self.checkpoint = self.checkpoint.clone().with_conflicted_file(file);
self
}
/// Record that a conflict has been resolved.
///
/// # Arguments
///
/// * `file` - The file path that was resolved
pub fn record_resolution(mut self, file: String) -> Self {
self.checkpoint = self.checkpoint.clone().with_resolved_file(file);
self
}
/// Record an error that occurred.
///
/// # Arguments
///
/// * `error` - The error message to record
pub fn record_error(mut self, error: String) -> Self {
self.checkpoint = self.checkpoint.clone().with_error(error);
self
}
/// Check if recovery is possible.
///
/// Returns `true` if the phase-specific error count is below the maximum
/// recovery attempts for the current phase.
#[cfg(any(test, feature = "test-utils"))]
#[must_use]
pub const fn can_recover(&self) -> bool {
let max_for_phase = self.checkpoint.phase.max_recovery_attempts();
self.checkpoint.phase_error_count < max_for_phase
}
/// Check if the rebase should be aborted.
///
/// Returns `true` if the phase-specific error count has exceeded the maximum
/// recovery attempts for the current phase.
#[cfg(any(test, feature = "test-utils"))]
#[must_use]
pub const fn should_abort(&self) -> bool {
let max_for_phase = self.checkpoint.phase.max_recovery_attempts();
self.checkpoint.phase_error_count >= max_for_phase
}
/// Check if all conflicts have been resolved.
///
/// Returns `true` if all conflicted files have been marked as resolved.
#[must_use]
pub fn all_conflicts_resolved(&self) -> bool {
self.checkpoint.all_conflicts_resolved()
}
/// Get the current checkpoint.
#[must_use]
pub const fn checkpoint(&self) -> &RebaseCheckpoint {
&self.checkpoint
}
/// Get the current phase.
#[must_use]
pub const fn phase(&self) -> &RebasePhase {
&self.checkpoint.phase
}
/// Get the upstream branch.
#[must_use]
pub fn upstream_branch(&self) -> &str {
&self.checkpoint.upstream_branch
}
/// Get the number of unresolved conflicts.
#[must_use]
pub fn unresolved_conflict_count(&self) -> usize {
self.checkpoint.unresolved_conflict_count()
}
/// Clear the checkpoint (typically on successful completion).
///
/// # Errors
///
/// Returns error if the operation fails.
pub fn clear_checkpoint(self) -> io::Result<()> {
clear_rebase_checkpoint()
}
/// Force abort and save the aborted state.
///
/// This method consumes the state machine and saves the aborted state.
/// It's primarily used in tests or for explicit abort scenarios where
/// you own the state machine.
///
/// # Errors
///
/// Returns an error if saving the checkpoint fails.
#[cfg(any(test, feature = "test-utils"))]
pub fn abort(self) -> io::Result<()> {
let checkpoint = self
.checkpoint
.clone()
.with_phase(RebasePhase::RebaseAborted);
save_rebase_checkpoint(&checkpoint)
}
}
/// Actions that can be taken during recovery.
#[cfg(any(test, feature = "test-utils"))]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RecoveryAction {
/// Continue with the rebase operation.
///
/// Used when the operation can proceed without changes,
/// such as after resolving conflicts or recovering from a checkpoint.
Continue,
/// Retry the current operation.
///
/// Used when transient failures can be overcome by retrying,
/// such as concurrent operations or stale locks.
Retry,
/// Abort the rebase.
///
/// Used when the error cannot be recovered automatically
/// and requires manual intervention or a full restart.
Abort,
/// Skip the current step and proceed.
///
/// Used when the current step can be safely bypassed,
/// such as for empty commits or `NoOp` scenarios.
Skip,
}
#[cfg(any(test, feature = "test-utils"))]
impl RecoveryAction {
/// Decide the appropriate recovery action based on the error and current state.
///
/// # Arguments
///
/// * `error_kind` - The error that occurred
/// * `error_count` - The number of errors that have occurred so far
/// * `max_attempts` - The maximum number of recovery attempts allowed
///
/// # Returns
///
/// Returns the appropriate `RecoveryAction` for the given error and state.
#[must_use]
pub const fn decide(
error_kind: &crate::git_helpers::rebase::RebaseErrorKind,
error_count: u32,
max_attempts: u32,
) -> Self {
// Check if we've exceeded maximum attempts
if error_count >= max_attempts {
return Self::Abort;
}
match error_kind {
// Category 1: Rebase Cannot Start - Generally not recoverable
crate::git_helpers::rebase::RebaseErrorKind::InvalidRevision { .. } => Self::Abort,
crate::git_helpers::rebase::RebaseErrorKind::DirtyWorkingTree => Self::Abort,
crate::git_helpers::rebase::RebaseErrorKind::ConcurrentOperation { .. } => Self::Retry,
crate::git_helpers::rebase::RebaseErrorKind::RepositoryCorrupt { .. } => Self::Abort,
crate::git_helpers::rebase::RebaseErrorKind::EnvironmentFailure { .. } => Self::Abort,
crate::git_helpers::rebase::RebaseErrorKind::HookRejection { .. } => Self::Abort,
// Category 2: Rebase Stops (Interrupted)
crate::git_helpers::rebase::RebaseErrorKind::ContentConflict { .. } => Self::Continue,
crate::git_helpers::rebase::RebaseErrorKind::PatchApplicationFailed { .. } => {
Self::Retry
}
crate::git_helpers::rebase::RebaseErrorKind::InteractiveStop { .. } => Self::Abort,
crate::git_helpers::rebase::RebaseErrorKind::EmptyCommit => Self::Skip,
crate::git_helpers::rebase::RebaseErrorKind::AutostashFailed { .. } => Self::Retry,
crate::git_helpers::rebase::RebaseErrorKind::CommitCreationFailed { .. } => Self::Retry,
crate::git_helpers::rebase::RebaseErrorKind::ReferenceUpdateFailed { .. } => {
Self::Retry
}
// Category 3: Post-Rebase Failures
#[cfg(any(test, feature = "test-utils"))]
crate::git_helpers::rebase::RebaseErrorKind::ValidationFailed { .. } => Self::Abort,
// Category 4: Interrupted/Corrupted State
#[cfg(any(test, feature = "test-utils"))]
crate::git_helpers::rebase::RebaseErrorKind::ProcessTerminated { .. } => Self::Continue,
#[cfg(any(test, feature = "test-utils"))]
crate::git_helpers::rebase::RebaseErrorKind::InconsistentState { .. } => Self::Retry,
// Category 5: Unknown
crate::git_helpers::rebase::RebaseErrorKind::Unknown { .. } => Self::Abort,
}
}
}