ralph_workflow/git_helpers/
rebase_state_machine.rs

1//! Rebase state machine for fault-tolerant rebase operations.
2//!
3//! This module provides a state machine that manages rebase operations
4//! with checkpoint-based recovery. It tracks the current phase of a rebase
5//! operation and can resume from interruptions.
6
7#![deny(unsafe_code)]
8
9use std::fs;
10use std::io;
11use std::io::Write;
12use std::path::Path;
13
14use super::rebase_checkpoint::{
15    clear_rebase_checkpoint, load_rebase_checkpoint, rebase_checkpoint_exists,
16    save_rebase_checkpoint, RebaseCheckpoint, RebasePhase,
17};
18
19/// Default maximum number of recovery attempts.
20const DEFAULT_MAX_RECOVERY_ATTEMPTS: u32 = 3;
21
22/// Rebase lock file name.
23const REBASE_LOCK_FILE: &str = "rebase.lock";
24
25/// Default lock timeout in seconds (30 minutes).
26const DEFAULT_LOCK_TIMEOUT_SECONDS: u64 = 1800;
27
28/// Get the rebase lock file path.
29///
30/// The lock is stored in `.agent/rebase.lock`
31/// relative to the current working directory.
32fn rebase_lock_path() -> String {
33    format!(".agent/{REBASE_LOCK_FILE}")
34}
35
36/// State machine for fault-tolerant rebase operations.
37///
38/// This state machine manages rebase operations with:
39/// - Checkpoint-based persistence
40/// - Automatic recovery from interruptions
41/// - Maximum recovery attempt limits
42/// - Conflict tracking
43pub struct RebaseStateMachine {
44    /// Current checkpoint state
45    checkpoint: RebaseCheckpoint,
46    /// Maximum number of recovery attempts
47    max_recovery_attempts: u32,
48}
49
50impl RebaseStateMachine {
51    /// Create a new state machine for a rebase operation.
52    ///
53    /// # Arguments
54    ///
55    /// * `upstream_branch` - The branch to rebase onto
56    pub fn new(upstream_branch: String) -> Self {
57        Self {
58            checkpoint: RebaseCheckpoint::new(upstream_branch),
59            max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
60        }
61    }
62
63    /// Load an existing state machine from checkpoint or create a new one.
64    ///
65    /// If a checkpoint exists, this will resume from that state.
66    /// Otherwise, creates a new state machine.
67    ///
68    /// This method handles corrupted checkpoints by:
69    /// - Attempting to load backup checkpoint
70    /// - Creating a fresh state if checkpoint is completely corrupted
71    ///
72    /// # Arguments
73    ///
74    /// * `upstream_branch` - The branch to rebase onto (used if no checkpoint exists)
75    ///
76    /// # Returns
77    ///
78    /// Returns `Ok(state_machine)` if successful, or an error if loading fails.
79    pub fn load_or_create(upstream_branch: String) -> io::Result<Self> {
80        if rebase_checkpoint_exists() {
81            // Try to load the primary checkpoint
82            match load_rebase_checkpoint() {
83                Ok(Some(checkpoint)) => {
84                    // Successfully loaded checkpoint
85                    Ok(Self {
86                        checkpoint,
87                        max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
88                    })
89                }
90                Ok(None) => {
91                    // Checkpoint file exists but is empty - try backup or create fresh
92                    Self::try_load_backup_or_create(upstream_branch)
93                }
94                Err(e) => {
95                    // Checkpoint is corrupted - try backup or create fresh
96                    // Log the error but attempt recovery
97                    eprintln!("Warning: Failed to load checkpoint: {e}. Attempting recovery...");
98
99                    match Self::try_load_backup_or_create(upstream_branch.clone()) {
100                        Ok(sm) => {
101                            // Backup loaded or fresh state created - clear corrupted checkpoint
102                            let _ = clear_rebase_checkpoint();
103                            Ok(sm)
104                        }
105                        Err(backup_err) => {
106                            // Even backup failed - return original error with context
107                            Err(io::Error::new(
108                                io::ErrorKind::InvalidData,
109                                format!(
110                                    "Failed to load checkpoint ({e}) and backup ({backup_err}). \
111                                     Manual intervention may be required."
112                                ),
113                            ))
114                        }
115                    }
116                }
117            }
118        } else {
119            Ok(Self::new(upstream_branch))
120        }
121    }
122
123    /// Try to load a backup checkpoint or create a fresh state machine.
124    ///
125    /// This is called when the primary checkpoint cannot be loaded.
126    ///
127    /// # Arguments
128    ///
129    /// * `upstream_branch` - The branch to rebase onto
130    ///
131    /// # Returns
132    ///
133    /// Returns `Ok(state_machine)` with either backup loaded or fresh state.
134    fn try_load_backup_or_create(upstream_branch: String) -> io::Result<Self> {
135        use super::rebase_checkpoint::rebase_checkpoint_backup_path;
136
137        let backup_path = rebase_checkpoint_backup_path();
138
139        // Check if backup exists
140        if Path::new(&backup_path).exists() {
141            // Try to load the backup checkpoint directly
142            match fs::read_to_string(&backup_path) {
143                Ok(content) => match serde_json::from_str::<RebaseCheckpoint>(&content) {
144                    Ok(checkpoint) => {
145                        eprintln!("Successfully recovered from backup checkpoint");
146                        return Ok(Self {
147                            checkpoint,
148                            max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
149                        });
150                    }
151                    Err(e) => {
152                        eprintln!("Backup checkpoint is also corrupted: {e}");
153                    }
154                },
155                Err(e) => {
156                    eprintln!("Failed to read backup checkpoint file: {e}");
157                }
158            }
159        }
160
161        // No backup available or backup is corrupted - create fresh state
162        eprintln!("Creating fresh state machine (checkpoint data lost)");
163        Ok(Self::new(upstream_branch))
164    }
165
166    /// Set the maximum number of recovery attempts.
167    pub fn with_max_recovery_attempts(mut self, max: u32) -> Self {
168        self.max_recovery_attempts = max;
169        self
170    }
171
172    /// Transition to a new phase and save checkpoint.
173    ///
174    /// # Arguments
175    ///
176    /// * `phase` - The new phase to transition to
177    ///
178    /// # Returns
179    ///
180    /// Returns `Ok(())` if the transition succeeded, or an error if saving failed.
181    pub fn transition_to(&mut self, phase: RebasePhase) -> io::Result<()> {
182        self.checkpoint = self.checkpoint.clone().with_phase(phase);
183        save_rebase_checkpoint(&self.checkpoint)
184    }
185
186    /// Record a conflict in a file.
187    ///
188    /// # Arguments
189    ///
190    /// * `file` - The file path that has conflicts
191    pub fn record_conflict(&mut self, file: String) {
192        self.checkpoint = self.checkpoint.clone().with_conflicted_file(file);
193    }
194
195    /// Record that a conflict has been resolved.
196    ///
197    /// # Arguments
198    ///
199    /// * `file` - The file path that was resolved
200    pub fn record_resolution(&mut self, file: String) {
201        self.checkpoint = self.checkpoint.clone().with_resolved_file(file);
202    }
203
204    /// Record an error that occurred.
205    ///
206    /// # Arguments
207    ///
208    /// * `error` - The error message to record
209    pub fn record_error(&mut self, error: String) {
210        self.checkpoint = self.checkpoint.clone().with_error(error);
211    }
212
213    /// Check if recovery is possible.
214    ///
215    /// Returns `true` if the phase-specific error count is below the maximum
216    /// recovery attempts for the current phase.
217    #[cfg(any(test, feature = "test-utils"))]
218    pub fn can_recover(&self) -> bool {
219        let max_for_phase = self.checkpoint.phase.max_recovery_attempts();
220        self.checkpoint.phase_error_count < max_for_phase
221    }
222
223    /// Check if the rebase should be aborted.
224    ///
225    /// Returns `true` if the phase-specific error count has exceeded the maximum
226    /// recovery attempts for the current phase.
227    #[cfg(any(test, feature = "test-utils"))]
228    pub fn should_abort(&self) -> bool {
229        let max_for_phase = self.checkpoint.phase.max_recovery_attempts();
230        self.checkpoint.phase_error_count >= max_for_phase
231    }
232
233    /// Check if all conflicts have been resolved.
234    ///
235    /// Returns `true` if all conflicted files have been marked as resolved.
236    pub fn all_conflicts_resolved(&self) -> bool {
237        self.checkpoint.all_conflicts_resolved()
238    }
239
240    /// Get the current checkpoint.
241    pub fn checkpoint(&self) -> &RebaseCheckpoint {
242        &self.checkpoint
243    }
244
245    /// Get the current phase.
246    pub fn phase(&self) -> &RebasePhase {
247        &self.checkpoint.phase
248    }
249
250    /// Get the upstream branch.
251    pub fn upstream_branch(&self) -> &str {
252        &self.checkpoint.upstream_branch
253    }
254
255    /// Get the number of unresolved conflicts.
256    pub fn unresolved_conflict_count(&self) -> usize {
257        self.checkpoint.unresolved_conflict_count()
258    }
259
260    /// Clear the checkpoint (typically on successful completion).
261    pub fn clear_checkpoint(self) -> io::Result<()> {
262        clear_rebase_checkpoint()
263    }
264
265    /// Force abort and save the aborted state.
266    ///
267    /// This method consumes the state machine and saves the aborted state.
268    /// It's primarily used in tests or for explicit abort scenarios where
269    /// you own the state machine.
270    #[cfg(any(test, feature = "test-utils"))]
271    pub fn abort(mut self) -> io::Result<()> {
272        self.checkpoint = self
273            .checkpoint
274            .clone()
275            .with_phase(RebasePhase::RebaseAborted);
276        save_rebase_checkpoint(&self.checkpoint)
277    }
278}
279
280/// Actions that can be taken during recovery.
281#[cfg(any(test, feature = "test-utils"))]
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub enum RecoveryAction {
284    /// Continue with the rebase operation.
285    ///
286    /// Used when the operation can proceed without changes,
287    /// such as after resolving conflicts or recovering from a checkpoint.
288    Continue,
289    /// Retry the current operation.
290    ///
291    /// Used when transient failures can be overcome by retrying,
292    /// such as concurrent operations or stale locks.
293    Retry,
294    /// Abort the rebase.
295    ///
296    /// Used when the error cannot be recovered automatically
297    /// and requires manual intervention or a full restart.
298    Abort,
299    /// Skip the current step and proceed.
300    ///
301    /// Used when the current step can be safely bypassed,
302    /// such as for empty commits or NoOp scenarios.
303    Skip,
304}
305
306#[cfg(any(test, feature = "test-utils"))]
307impl RecoveryAction {
308    /// Decide the appropriate recovery action based on the error and current state.
309    ///
310    /// # Arguments
311    ///
312    /// * `error_kind` - The error that occurred
313    /// * `error_count` - The number of errors that have occurred so far
314    /// * `max_attempts` - The maximum number of recovery attempts allowed
315    ///
316    /// # Returns
317    ///
318    /// Returns the appropriate `RecoveryAction` for the given error and state.
319    pub fn decide(
320        error_kind: &crate::git_helpers::rebase::RebaseErrorKind,
321        error_count: u32,
322        max_attempts: u32,
323    ) -> Self {
324        // Check if we've exceeded maximum attempts
325        if error_count >= max_attempts {
326            return RecoveryAction::Abort;
327        }
328
329        match error_kind {
330            // Category 1: Rebase Cannot Start - Generally not recoverable
331            crate::git_helpers::rebase::RebaseErrorKind::InvalidRevision { .. } => {
332                RecoveryAction::Abort
333            }
334            crate::git_helpers::rebase::RebaseErrorKind::DirtyWorkingTree => RecoveryAction::Abort,
335            crate::git_helpers::rebase::RebaseErrorKind::ConcurrentOperation { .. } => {
336                RecoveryAction::Retry
337            }
338            crate::git_helpers::rebase::RebaseErrorKind::RepositoryCorrupt { .. } => {
339                RecoveryAction::Abort
340            }
341            crate::git_helpers::rebase::RebaseErrorKind::EnvironmentFailure { .. } => {
342                RecoveryAction::Abort
343            }
344            crate::git_helpers::rebase::RebaseErrorKind::HookRejection { .. } => {
345                RecoveryAction::Abort
346            }
347
348            // Category 2: Rebase Stops (Interrupted)
349            crate::git_helpers::rebase::RebaseErrorKind::ContentConflict { .. } => {
350                RecoveryAction::Continue
351            }
352            crate::git_helpers::rebase::RebaseErrorKind::PatchApplicationFailed { .. } => {
353                RecoveryAction::Retry
354            }
355            crate::git_helpers::rebase::RebaseErrorKind::InteractiveStop { .. } => {
356                RecoveryAction::Abort
357            }
358            crate::git_helpers::rebase::RebaseErrorKind::EmptyCommit => RecoveryAction::Skip,
359            crate::git_helpers::rebase::RebaseErrorKind::AutostashFailed { .. } => {
360                RecoveryAction::Retry
361            }
362            crate::git_helpers::rebase::RebaseErrorKind::CommitCreationFailed { .. } => {
363                RecoveryAction::Retry
364            }
365            crate::git_helpers::rebase::RebaseErrorKind::ReferenceUpdateFailed { .. } => {
366                RecoveryAction::Retry
367            }
368
369            // Category 3: Post-Rebase Failures
370            #[cfg(any(test, feature = "test-utils"))]
371            crate::git_helpers::rebase::RebaseErrorKind::ValidationFailed { .. } => {
372                RecoveryAction::Abort
373            }
374
375            // Category 4: Interrupted/Corrupted State
376            #[cfg(any(test, feature = "test-utils"))]
377            crate::git_helpers::rebase::RebaseErrorKind::ProcessTerminated { .. } => {
378                RecoveryAction::Continue
379            }
380            #[cfg(any(test, feature = "test-utils"))]
381            crate::git_helpers::rebase::RebaseErrorKind::InconsistentState { .. } => {
382                RecoveryAction::Retry
383            }
384
385            // Category 5: Unknown
386            crate::git_helpers::rebase::RebaseErrorKind::Unknown { .. } => RecoveryAction::Abort,
387        }
388    }
389}
390
391/// RAII-style guard for rebase lock.
392///
393/// Automatically releases the lock when dropped.
394pub struct RebaseLock {
395    /// Whether we own the lock
396    owns_lock: bool,
397}
398
399impl Drop for RebaseLock {
400    fn drop(&mut self) {
401        if self.owns_lock {
402            let _ = release_rebase_lock();
403        }
404    }
405}
406
407impl RebaseLock {
408    /// Create a new lock guard that owns the lock.
409    pub fn new() -> io::Result<Self> {
410        acquire_rebase_lock()?;
411        Ok(Self { owns_lock: true })
412    }
413
414    /// Relinquish ownership of the lock without releasing it.
415    ///
416    /// This is useful when transferring ownership.
417    #[must_use]
418    #[cfg(any(test, feature = "test-utils"))]
419    pub fn leak(mut self) -> bool {
420        let owned = self.owns_lock;
421        self.owns_lock = false;
422        owned
423    }
424}
425
426/// Acquire the rebase lock.
427///
428/// Creates a lock file with the current process ID and timestamp.
429/// Returns an error if the lock is held by another process.
430///
431/// # Errors
432///
433/// Returns an error if:
434/// - The lock file exists and is not stale
435/// - The lock file cannot be created
436pub fn acquire_rebase_lock() -> io::Result<()> {
437    let lock_path = rebase_lock_path();
438    let path = Path::new(&lock_path);
439
440    // Ensure .agent directory exists
441    if let Some(parent) = path.parent() {
442        fs::create_dir_all(parent)?;
443    }
444
445    // Check if lock already exists
446    if path.exists() {
447        if !is_lock_stale()? {
448            return Err(io::Error::new(
449                io::ErrorKind::PermissionDenied,
450                "Rebase is already in progress. If you believe this is incorrect, \
451                 wait 30 minutes for the lock to expire or manually remove `.agent/rebase.lock`.",
452            ));
453        }
454        // Lock is stale, remove it
455        fs::remove_file(path)?;
456    }
457
458    // Create lock file with PID and timestamp
459    let pid = std::process::id();
460    let timestamp = chrono::Utc::now().to_rfc3339();
461    let lock_content = format!("pid={pid}\ntimestamp={timestamp}\n");
462
463    let mut file = fs::File::create(path)?;
464    file.write_all(lock_content.as_bytes())?;
465    file.sync_all()?;
466
467    Ok(())
468}
469
470/// Release the rebase lock.
471///
472/// Removes the lock file. Does nothing if no lock exists.
473///
474/// # Errors
475///
476/// Returns an error if the lock file exists but cannot be removed.
477pub fn release_rebase_lock() -> io::Result<()> {
478    let lock_path = rebase_lock_path();
479    let path = Path::new(&lock_path);
480
481    if path.exists() {
482        fs::remove_file(path)?;
483    }
484
485    Ok(())
486}
487
488/// Check if the lock file is stale.
489///
490/// A lock is considered stale if it's older than the timeout period.
491///
492/// # Returns
493///
494/// Returns `true` if the lock is stale, `false` otherwise.
495fn is_lock_stale() -> io::Result<bool> {
496    let lock_path = rebase_lock_path();
497    let path = Path::new(&lock_path);
498
499    if !path.exists() {
500        return Ok(false);
501    }
502
503    // Read lock file to get timestamp
504    let content = fs::read_to_string(path)?;
505
506    // Parse timestamp from lock file
507    let timestamp_line = content
508        .lines()
509        .find(|line| line.starts_with("timestamp="))
510        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Lock file missing timestamp"))?;
511
512    let timestamp_str = timestamp_line.strip_prefix("timestamp=").ok_or_else(|| {
513        io::Error::new(
514            io::ErrorKind::InvalidData,
515            "Invalid timestamp format in lock file",
516        )
517    })?;
518
519    let lock_time = chrono::DateTime::parse_from_rfc3339(timestamp_str).map_err(|_| {
520        io::Error::new(
521            io::ErrorKind::InvalidData,
522            "Invalid timestamp format in lock file",
523        )
524    })?;
525
526    let now = chrono::Utc::now();
527    let elapsed = now.signed_duration_since(lock_time);
528
529    Ok(elapsed.num_seconds() > DEFAULT_LOCK_TIMEOUT_SECONDS as i64)
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535
536    #[test]
537    fn test_state_machine_new() {
538        let machine = RebaseStateMachine::new("main".to_string());
539        assert_eq!(machine.phase(), &RebasePhase::NotStarted);
540        assert_eq!(machine.upstream_branch(), "main");
541        assert!(machine.can_recover());
542        assert!(!machine.should_abort());
543    }
544
545    #[test]
546    fn test_state_machine_transition() {
547        use test_helpers::with_temp_cwd;
548
549        with_temp_cwd(|_dir| {
550            let mut machine = RebaseStateMachine::new("main".to_string());
551            machine
552                .transition_to(RebasePhase::RebaseInProgress)
553                .unwrap();
554            assert_eq!(machine.phase(), &RebasePhase::RebaseInProgress);
555        });
556    }
557
558    #[test]
559    fn test_state_machine_record_conflict() {
560        let mut machine = RebaseStateMachine::new("main".to_string());
561        machine.record_conflict("file1.rs".to_string());
562        machine.record_conflict("file2.rs".to_string());
563        assert_eq!(machine.unresolved_conflict_count(), 2);
564    }
565
566    #[test]
567    fn test_state_machine_record_resolution() {
568        let mut machine = RebaseStateMachine::new("main".to_string());
569        machine.record_conflict("file1.rs".to_string());
570        machine.record_conflict("file2.rs".to_string());
571        assert_eq!(machine.unresolved_conflict_count(), 2);
572
573        machine.record_resolution("file1.rs".to_string());
574        assert_eq!(machine.unresolved_conflict_count(), 1);
575        assert!(!machine.all_conflicts_resolved());
576
577        machine.record_resolution("file2.rs".to_string());
578        assert_eq!(machine.unresolved_conflict_count(), 0);
579        assert!(machine.all_conflicts_resolved());
580    }
581
582    #[test]
583    fn test_state_machine_record_error() {
584        let mut machine = RebaseStateMachine::new("main".to_string());
585        assert!(machine.can_recover());
586        assert!(!machine.should_abort());
587
588        machine.record_error("First error".to_string());
589        assert!(machine.can_recover());
590
591        machine.record_error("Second error".to_string());
592        assert!(machine.can_recover());
593
594        machine.record_error("Third error".to_string());
595        assert!(!machine.can_recover());
596        assert!(machine.should_abort());
597    }
598
599    #[test]
600    fn test_state_machine_custom_max_attempts() {
601        let machine = RebaseStateMachine::new("main".to_string()).with_max_recovery_attempts(1);
602
603        assert!(machine.can_recover());
604    }
605
606    #[test]
607    fn test_state_machine_save_load() {
608        use test_helpers::with_temp_cwd;
609
610        with_temp_cwd(|_dir| {
611            let mut machine1 = RebaseStateMachine::new("feature-branch".to_string());
612            machine1
613                .transition_to(RebasePhase::ConflictDetected)
614                .unwrap();
615
616            // Note: record_conflict only updates in-memory state, need to save checkpoint
617            // For the test, let's create a checkpoint with conflicts and save it
618            use super::super::rebase_checkpoint::{
619                save_rebase_checkpoint, RebaseCheckpoint, RebasePhase,
620            };
621            let checkpoint = RebaseCheckpoint::new("feature-branch".to_string())
622                .with_phase(RebasePhase::ConflictDetected)
623                .with_conflicted_file("test.rs".to_string());
624            save_rebase_checkpoint(&checkpoint).unwrap();
625
626            // Load a new machine from the checkpoint
627            let machine2 = RebaseStateMachine::load_or_create("main".to_string()).unwrap();
628            assert_eq!(machine2.phase(), &RebasePhase::ConflictDetected);
629            assert_eq!(machine2.upstream_branch(), "feature-branch");
630            assert_eq!(machine2.unresolved_conflict_count(), 1);
631        });
632    }
633
634    #[test]
635    fn test_state_machine_clear_checkpoint() {
636        use test_helpers::with_temp_cwd;
637
638        with_temp_cwd(|_dir| {
639            let mut machine = RebaseStateMachine::new("main".to_string());
640            machine
641                .transition_to(RebasePhase::RebaseInProgress)
642                .unwrap();
643            assert!(rebase_checkpoint_exists());
644
645            machine.clear_checkpoint().unwrap();
646            assert!(!rebase_checkpoint_exists());
647        });
648    }
649
650    #[test]
651    fn test_state_machine_abort() {
652        use test_helpers::with_temp_cwd;
653
654        with_temp_cwd(|_dir| {
655            let mut machine = RebaseStateMachine::new("main".to_string());
656            machine
657                .transition_to(RebasePhase::ConflictDetected)
658                .unwrap();
659            machine.abort().unwrap();
660
661            let loaded = RebaseStateMachine::load_or_create("main".to_string()).unwrap();
662            assert_eq!(loaded.phase(), &RebasePhase::RebaseAborted);
663        });
664    }
665
666    #[test]
667    fn test_recovery_action_variants_exist() {
668        let _ = RecoveryAction::Continue;
669        let _ = RecoveryAction::Retry;
670        let _ = RecoveryAction::Abort;
671        let _ = RecoveryAction::Skip;
672    }
673
674    #[test]
675    fn test_acquire_and_release_rebase_lock() {
676        use test_helpers::with_temp_cwd;
677
678        with_temp_cwd(|_dir| {
679            // Acquire lock
680            acquire_rebase_lock().unwrap();
681
682            // Verify lock file exists
683            let lock_path = rebase_lock_path();
684            assert!(Path::new(&lock_path).exists());
685
686            // Release lock
687            release_rebase_lock().unwrap();
688
689            // Verify lock file is gone
690            assert!(!Path::new(&lock_path).exists());
691        });
692    }
693
694    #[test]
695    fn test_rebase_lock_prevents_duplicate() {
696        use test_helpers::with_temp_cwd;
697
698        with_temp_cwd(|_dir| {
699            // Acquire first lock
700            acquire_rebase_lock().unwrap();
701
702            // Trying to acquire again should fail
703            let result = acquire_rebase_lock();
704            assert!(result.is_err());
705            assert!(result
706                .unwrap_err()
707                .to_string()
708                .contains("already in progress"));
709        });
710    }
711
712    #[test]
713    fn test_rebase_lock_guard_auto_releases() {
714        use test_helpers::with_temp_cwd;
715
716        with_temp_cwd(|_dir| {
717            {
718                // Create lock guard
719                let _lock = RebaseLock::new().unwrap();
720                let lock_path = rebase_lock_path();
721                assert!(Path::new(&lock_path).exists());
722            }
723            // Lock should be released when guard goes out of scope
724
725            let lock_path = rebase_lock_path();
726            assert!(!Path::new(&lock_path).exists());
727        });
728    }
729
730    #[test]
731    fn test_rebase_lock_guard_leak() {
732        use test_helpers::with_temp_cwd;
733
734        with_temp_cwd(|_dir| {
735            {
736                let lock = RebaseLock::new().unwrap();
737                let lock_path = rebase_lock_path();
738                assert!(Path::new(&lock_path).exists());
739
740                // Leak the lock - it won't be released
741                let _ = lock.leak();
742            }
743
744            // Lock should still exist after guard is dropped
745            let lock_path = rebase_lock_path();
746            assert!(Path::new(&lock_path).exists());
747
748            // Clean up
749            let _ = release_rebase_lock();
750        });
751    }
752
753    #[test]
754    fn test_stale_lock_is_replaced() {
755        use test_helpers::with_temp_cwd;
756
757        with_temp_cwd(|_dir| {
758            // Create a lock file with an old timestamp
759            let lock_path = rebase_lock_path();
760            let old_timestamp = chrono::Utc::now()
761                - chrono::Duration::seconds(DEFAULT_LOCK_TIMEOUT_SECONDS as i64 + 60);
762            let lock_content = format!("pid=12345\ntimestamp={}\n", old_timestamp.to_rfc3339());
763
764            fs::create_dir_all(".agent").unwrap();
765            fs::write(&lock_path, lock_content).unwrap();
766
767            // Acquire lock should succeed since old lock is stale
768            acquire_rebase_lock().unwrap();
769
770            // Verify new lock file exists
771            assert!(Path::new(&lock_path).exists());
772
773            // Clean up
774            release_rebase_lock().unwrap();
775        });
776    }
777
778    #[test]
779    fn test_recovery_action_decide_content_conflict() {
780        use super::super::rebase::RebaseErrorKind;
781
782        let error = RebaseErrorKind::ContentConflict {
783            files: vec!["file1.rs".to_string()],
784        };
785
786        // Content conflict should always return Continue (to AI resolution)
787        let action = RecoveryAction::decide(&error, 0, 3);
788        assert_eq!(action, RecoveryAction::Continue);
789
790        // Even at max attempts, ContentConflict should Continue
791        let action = RecoveryAction::decide(&error, 2, 3);
792        assert_eq!(action, RecoveryAction::Continue);
793
794        // But if we exceed max attempts, it should Abort
795        let action = RecoveryAction::decide(&error, 3, 3);
796        assert_eq!(action, RecoveryAction::Abort);
797    }
798
799    #[test]
800    fn test_recovery_action_decide_concurrent_operation() {
801        use super::super::rebase::RebaseErrorKind;
802
803        let error = RebaseErrorKind::ConcurrentOperation {
804            operation: "rebase".to_string(),
805        };
806
807        // Concurrent operation should be retried
808        let action = RecoveryAction::decide(&error, 0, 3);
809        assert_eq!(action, RecoveryAction::Retry);
810
811        // Should keep retrying until max attempts
812        let action = RecoveryAction::decide(&error, 2, 3);
813        assert_eq!(action, RecoveryAction::Retry);
814
815        // At max attempts, should abort
816        let action = RecoveryAction::decide(&error, 3, 3);
817        assert_eq!(action, RecoveryAction::Abort);
818    }
819
820    #[test]
821    fn test_recovery_action_decide_invalid_revision() {
822        use super::super::rebase::RebaseErrorKind;
823
824        let error = RebaseErrorKind::InvalidRevision {
825            revision: "nonexistent".to_string(),
826        };
827
828        // Invalid revision should always abort (not recoverable)
829        let action = RecoveryAction::decide(&error, 0, 3);
830        assert_eq!(action, RecoveryAction::Abort);
831    }
832
833    #[test]
834    fn test_recovery_action_decide_dirty_working_tree() {
835        use super::super::rebase::RebaseErrorKind;
836
837        let error = RebaseErrorKind::DirtyWorkingTree;
838
839        // Dirty working tree should always abort (user needs to commit/stash)
840        let action = RecoveryAction::decide(&error, 0, 3);
841        assert_eq!(action, RecoveryAction::Abort);
842    }
843
844    #[test]
845    fn test_recovery_action_decide_empty_commit() {
846        use super::super::rebase::RebaseErrorKind;
847
848        let error = RebaseErrorKind::EmptyCommit;
849
850        // Empty commit should be skipped
851        let action = RecoveryAction::decide(&error, 0, 3);
852        assert_eq!(action, RecoveryAction::Skip);
853
854        // Even at high error counts, should still skip
855        let action = RecoveryAction::decide(&error, 5, 10);
856        assert_eq!(action, RecoveryAction::Skip);
857    }
858
859    #[test]
860    fn test_recovery_action_decide_process_terminated() {
861        use super::super::rebase::RebaseErrorKind;
862
863        let error = RebaseErrorKind::ProcessTerminated {
864            reason: "agent crashed".to_string(),
865        };
866
867        // Process termination should continue (recover from checkpoint)
868        let action = RecoveryAction::decide(&error, 0, 3);
869        assert_eq!(action, RecoveryAction::Continue);
870    }
871
872    #[test]
873    fn test_recovery_action_decide_inconsistent_state() {
874        use super::super::rebase::RebaseErrorKind;
875
876        let error = RebaseErrorKind::InconsistentState {
877            details: "HEAD detached unexpectedly".to_string(),
878        };
879
880        // Inconsistent state should retry (after cleanup)
881        let action = RecoveryAction::decide(&error, 0, 3);
882        assert_eq!(action, RecoveryAction::Retry);
883
884        // Should keep retrying until max attempts
885        let action = RecoveryAction::decide(&error, 2, 3);
886        assert_eq!(action, RecoveryAction::Retry);
887
888        // At max attempts, should abort
889        let action = RecoveryAction::decide(&error, 3, 3);
890        assert_eq!(action, RecoveryAction::Abort);
891    }
892
893    #[test]
894    fn test_recovery_action_decide_patch_application_failed() {
895        use super::super::rebase::RebaseErrorKind;
896
897        let error = RebaseErrorKind::PatchApplicationFailed {
898            reason: "context mismatch".to_string(),
899        };
900
901        // Patch application failure should retry
902        let action = RecoveryAction::decide(&error, 0, 3);
903        assert_eq!(action, RecoveryAction::Retry);
904    }
905
906    #[test]
907    fn test_recovery_action_decide_validation_failed() {
908        use super::super::rebase::RebaseErrorKind;
909
910        let error = RebaseErrorKind::ValidationFailed {
911            reason: "tests failed".to_string(),
912        };
913
914        // Validation failure should abort (needs manual fix)
915        let action = RecoveryAction::decide(&error, 0, 3);
916        assert_eq!(action, RecoveryAction::Abort);
917    }
918
919    #[test]
920    fn test_recovery_action_decide_unknown() {
921        use super::super::rebase::RebaseErrorKind;
922
923        let error = RebaseErrorKind::Unknown {
924            details: "something went wrong".to_string(),
925        };
926
927        // Unknown errors should abort (safe default)
928        let action = RecoveryAction::decide(&error, 0, 3);
929        assert_eq!(action, RecoveryAction::Abort);
930    }
931
932    #[test]
933    fn test_recovery_action_decide_max_attempts_exceeded() {
934        use super::super::rebase::RebaseErrorKind;
935
936        let retryable_errors = [
937            RebaseErrorKind::ConcurrentOperation {
938                operation: "merge".to_string(),
939            },
940            RebaseErrorKind::PatchApplicationFailed {
941                reason: "fuzz failure".to_string(),
942            },
943            RebaseErrorKind::AutostashFailed {
944                reason: "stash pop failed".to_string(),
945            },
946        ];
947
948        // All retryable errors should abort when max attempts exceeded
949        for error in retryable_errors {
950            let action = RecoveryAction::decide(&error, 5, 3);
951            assert_eq!(
952                action,
953                RecoveryAction::Abort,
954                "Expected Abort for error: {error:?}"
955            );
956        }
957    }
958
959    #[test]
960    fn test_recovery_action_decide_category_1_non_recoverable() {
961        use super::super::rebase::RebaseErrorKind;
962
963        let non_recoverable_errors = [
964            RebaseErrorKind::InvalidRevision {
965                revision: "bad-ref".to_string(),
966            },
967            RebaseErrorKind::RepositoryCorrupt {
968                details: "missing objects".to_string(),
969            },
970            RebaseErrorKind::EnvironmentFailure {
971                reason: "no editor configured".to_string(),
972            },
973            RebaseErrorKind::HookRejection {
974                hook_name: "pre-rebase".to_string(),
975            },
976        ];
977
978        // All these should abort regardless of error count
979        for error in non_recoverable_errors {
980            let action = RecoveryAction::decide(&error, 0, 3);
981            assert_eq!(
982                action,
983                RecoveryAction::Abort,
984                "Expected Abort for error: {error:?}"
985            );
986        }
987    }
988
989    #[test]
990    fn test_recovery_action_decide_category_2_mixed() {
991        use super::super::rebase::RebaseErrorKind;
992
993        // Interactive stop should abort (manual intervention needed)
994        let interactive = RebaseErrorKind::InteractiveStop {
995            command: "edit".to_string(),
996        };
997        assert_eq!(
998            RecoveryAction::decide(&interactive, 0, 3),
999            RecoveryAction::Abort
1000        );
1001
1002        // Reference update failure should retry (transient)
1003        let ref_fail = RebaseErrorKind::ReferenceUpdateFailed {
1004            reason: "concurrent update".to_string(),
1005        };
1006        assert_eq!(
1007            RecoveryAction::decide(&ref_fail, 0, 3),
1008            RecoveryAction::Retry
1009        );
1010
1011        // Commit creation failure should retry (transient)
1012        let commit_fail = RebaseErrorKind::CommitCreationFailed {
1013            reason: "hook failed".to_string(),
1014        };
1015        assert_eq!(
1016            RecoveryAction::decide(&commit_fail, 0, 3),
1017            RecoveryAction::Retry
1018        );
1019    }
1020}
ralph_workflow/git_helpers/rebase_state_machine.rs

ralph_workflow/git_helpers/
rebase_state_machine.rs