ralph-workflow 0.7.18

PROMPT-driven multi-agent orchestrator for git repos
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
//! Phase-specific effect orchestration.
//!
//! This module contains pure orchestration logic for determining the next effect
//! based on the current pipeline state. All functions are deterministic and perform
//! no I/O operations.
//!
//! # Architecture
//!
//! Each phase module implements a `determine_*_effect()` function that:
//! - Takes `&PipelineState` as input
//! - Returns an `Effect` to execute next
//! - Performs NO I/O or side effects
//! - Is purely deterministic
//!
//! # Priority Order
//!
//! The main `determine_next_effect_for_phase()` function is called by the
//! higher-level orchestration layer in `xsd_retry.rs`, which handles:
//!
//! 1. **Continuation cleanup** - Write pending continuation context
//! 2. **Retry logic** - Same-agent retry after timeout/failure
//! 3. **XSD retry** - Re-invoke agent after XSD validation failure
//! 4. **Continuation** - Re-invoke agent with continuation prompt
//! 5. **Normal progression** - Call phase-specific orchestration (this module)
//!
//! # Phase Modules
//!
//! - `planning` - Planning phase orchestration
//! - `development` - Development phase orchestration (including Analysis agent)
//! - `review` - Review phase orchestration (including Fix agent)
//! - `commit` - Commit phase orchestration
//!
//! # Special Cases
//!
//! - `FinalValidation` phase → `CheckUncommittedChangesBeforeTermination` (safety check), then `ValidateFinalState`
//! - Finalizing phase → `RestorePromptPermissions` effect
//! - `AwaitingDevFix` phase → `TriggerDevFixFlow` effect
//! - Complete/Interrupted phase → `CheckUncommittedChangesBeforeTermination` (safety check), then `SaveCheckpoint`
//!
//! ## Pre-Termination Safety Check
//!
//! Before any pipeline termination (Complete, Interrupted, or after `FinalValidation`),
//! the orchestration derives a `CheckUncommittedChangesBeforeTermination` effect to
//! ensure no work is lost:
//!
//! - If uncommitted changes exist → route to `CommitMessage` phase
//! - If working directory is clean → emit `PreTerminationSafetyCheckPassed` and proceed
//! - If git snapshot fails → route to `AwaitingDevFix` for recovery
//!
//! **THE ONLY EXCEPTION:** User-initiated Ctrl+C (`interrupted_by_user=true`) skips
//! this check because the user explicitly chose to interrupt. All other termination
//! paths (`AwaitingDevFix` exhaustion, completion marker emission, etc.) MUST commit
//! uncommitted work before terminating.

mod commit;
mod development;
mod planning;
mod review;

use crate::reducer::effect::Effect;
use crate::reducer::event::PipelinePhase;
use crate::reducer::state::PipelineState;

pub fn determine_next_effect_for_phase(state: &PipelineState) -> Effect {
    match state.phase {
        PipelinePhase::Planning => planning::determine_planning_effect(state),
        PipelinePhase::Development => development::determine_development_effect(state),
        PipelinePhase::Review => review::determine_review_effect(state),
        PipelinePhase::CommitMessage => commit::determine_commit_effect(state),
        PipelinePhase::FinalValidation => {
            // SAFETY CHECK: Ensure no uncommitted work before finalization
            // This check happens before FinalizingStarted, ensuring all work is committed
            // before the pipeline enters its terminal sequence (Finalizing -> Complete)
            if !state.pre_termination_commit_checked {
                return Effect::CheckUncommittedChangesBeforeTermination;
            }

            Effect::ValidateFinalState
        }
        PipelinePhase::Finalizing => Effect::RestorePromptPermissions,
        PipelinePhase::AwaitingDevFix => {
            // Completion marker emission must preempt recovery-loop effects.
            // If the marker write failed, we must keep retrying deterministically until
            // the marker is successfully written (CompletionMarkerEmitted).
            if state.completion_marker_pending {
                // Completion marker emission is NOT an exception to the pre-termination
                // commit safety check. The ONLY exception is user-initiated Ctrl+C.
                if !state.interrupted_by_user && !state.pre_termination_commit_checked {
                    return Effect::CheckUncommittedChangesBeforeTermination;
                }

                return Effect::EmitCompletionMarkerAndTerminate {
                    is_failure: state.completion_marker_is_failure,
                    reason: state.completion_marker_reason.clone(),
                };
            }

            // If dev-fix already triggered and recovery state is set, attempt recovery
            if state.dev_fix_triggered && state.recovery_escalation_level > 0 {
                // Derive the appropriate recovery effect based on escalation level
                if state.recovery_escalation_level == 1 {
                    // Level 1: Simple retry - emit RecoveryAttempted to transition back
                    Effect::AttemptRecovery {
                        level: state.recovery_escalation_level,
                        attempt_count: state.dev_fix_attempt_count,
                    }
                } else {
                    // Level 2+: Requires state reset - use EmitRecoveryReset
                    use crate::reducer::effect::RecoveryResetType;
                    let (reset_type, target_phase) = match state.recovery_escalation_level {
                        2 => (
                            RecoveryResetType::PhaseStart,
                            state
                                .failed_phase_for_recovery
                                .unwrap_or(PipelinePhase::Development),
                        ),
                        3 => (RecoveryResetType::IterationReset, PipelinePhase::Planning),
                        _ => (RecoveryResetType::CompleteReset, PipelinePhase::Planning),
                    };
                    Effect::EmitRecoveryReset {
                        reset_type,
                        target_phase,
                    }
                }
            } else {
                // First time in AwaitingDevFix or dev-fix not yet triggered
                let failed_phase = state
                    .failed_phase_for_recovery
                    .or(state.previous_phase)
                    .unwrap_or(PipelinePhase::Development);
                let failed_phase = if failed_phase == PipelinePhase::AwaitingDevFix {
                    PipelinePhase::Development
                } else {
                    failed_phase
                };
                Effect::TriggerDevFixFlow {
                    failed_phase,
                    failed_role: state.agent_chain.current_drain.role(),
                    retry_cycle: state.agent_chain.retry_cycle,
                }
            }
        }
        PipelinePhase::Complete | PipelinePhase::Interrupted => {
            use crate::reducer::event::CheckpointTrigger;

            // EXCEPTION: User-initiated Ctrl+C (interrupted_by_user=true) skips safety check.
            //
            // IMPORTANT: This exception applies ONLY to `phase == Interrupted`.
            // If a checkpoint is resumed with `phase == Complete` but `interrupted_by_user == true`
            // (e.g., a mis-set checkpoint), we must still run the pre-termination safety check.
            if state.phase == PipelinePhase::Interrupted && state.interrupted_by_user {
                // On Interrupted, ALWAYS attempt PROMPT.md restoration before checkpoint.
                // We do NOT gate on restore_needed because:
                // 1. A prior crashed run (SIGKILL) may have left PROMPT.md read-only
                // 2. This run was interrupted early before LockPromptPermissions executed
                // 3. restore_needed is false, but PROMPT.md may still need restoration
                // The restoration handler is idempotent - calling it on already-writable
                // PROMPT.md is a no-op.
                if !state.prompt_permissions.restored {
                    return Effect::RestorePromptPermissions;
                }

                return Effect::SaveCheckpoint {
                    trigger: CheckpointTrigger::Interrupt,
                };
            }

            // SAFETY CHECK: Ensure no uncommitted work before termination
            // This applies to ALL other termination paths:
            // - AwaitingDevFix exhaustion → Interrupted
            // - Completion marker emission → Interrupted
            // - Normal completion → Complete
            if !state.pre_termination_commit_checked {
                return Effect::CheckUncommittedChangesBeforeTermination;
            }

            // Safety check passed - proceed with normal termination
            // On Interrupted phase, always attempt PROMPT.md restoration (same reasoning
            // as user-initiated path: prior crashed runs may have left it read-only).
            if state.phase == PipelinePhase::Interrupted && !state.prompt_permissions.restored {
                return Effect::RestorePromptPermissions;
            }

            // Programmatic termination (Complete or non-user Interrupted) should NOT be
            // classified as an interrupt. Reserve CheckpointTrigger::Interrupt for true
            // Ctrl+C interruptions (interrupted_by_user=true path above).
            Effect::SaveCheckpoint {
                trigger: CheckpointTrigger::PhaseTransition,
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::agents::AgentRole;
    use crate::reducer::state::{AgentChainState, PromptPermissionsState};

    #[test]
    fn trigger_dev_fix_flow_prefers_failed_phase_for_recovery_over_previous_phase() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::AwaitingDevFix,
            previous_phase: Some(PipelinePhase::AwaitingDevFix),
            failed_phase_for_recovery: Some(PipelinePhase::CommitMessage),
            dev_fix_triggered: false,
            recovery_escalation_level: 0,
            agent_chain: AgentChainState {
                current_role: AgentRole::Developer,
                retry_cycle: 7,
                ..AgentChainState::default()
            },
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        match effect {
            Effect::TriggerDevFixFlow {
                failed_phase,
                failed_role,
                retry_cycle,
            } => {
                assert_eq!(failed_phase, PipelinePhase::CommitMessage);
                assert_eq!(failed_role, AgentRole::Developer);
                assert_eq!(retry_cycle, 7);
            }
            other => panic!("expected TriggerDevFixFlow, got: {other:?}"),
        }
    }

    #[test]
    fn trigger_dev_fix_flow_never_reports_awaiting_dev_fix_as_failed_phase() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::AwaitingDevFix,
            previous_phase: Some(PipelinePhase::AwaitingDevFix),
            failed_phase_for_recovery: None,
            dev_fix_triggered: false,
            recovery_escalation_level: 0,
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        match effect {
            Effect::TriggerDevFixFlow { failed_phase, .. } => {
                assert_ne!(failed_phase, PipelinePhase::AwaitingDevFix);
            }
            other => panic!("expected TriggerDevFixFlow, got: {other:?}"),
        }
    }

    #[test]
    fn awaiting_dev_fix_completion_marker_pending_requires_safety_check_first() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::AwaitingDevFix,
            completion_marker_pending: true,
            completion_marker_is_failure: true,
            completion_marker_reason: Some("safety_valve".to_string()),
            interrupted_by_user: false,
            pre_termination_commit_checked: false,
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        assert!(
            matches!(effect, Effect::CheckUncommittedChangesBeforeTermination),
            "expected safety check to preempt completion marker emission, got: {effect:?}"
        );
    }

    #[test]
    fn awaiting_dev_fix_completion_marker_pending_emits_after_safety_check() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::AwaitingDevFix,
            completion_marker_pending: true,
            completion_marker_is_failure: true,
            completion_marker_reason: Some("safety_valve".to_string()),
            interrupted_by_user: false,
            pre_termination_commit_checked: true,
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        assert!(
            matches!(
                effect,
                Effect::EmitCompletionMarkerAndTerminate {
                    is_failure: true,
                    ref reason
                } if reason.as_deref() == Some("safety_valve")
            ),
            "expected EmitCompletionMarkerAndTerminate after safety check, got: {effect:?}"
        );
    }

    // Dev-fix agent selection is enforced by the TriggerDevFixFlow handler.
    // Orchestration intentionally preserves the original failed role in the effect params.

    #[test]
    fn user_interrupt_skips_pre_termination_safety_check() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::Interrupted,
            interrupted_by_user: true,
            pre_termination_commit_checked: false,
            prompt_permissions: PromptPermissionsState {
                restored: true,
                ..Default::default()
            },
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        // Should skip safety check and go directly to SaveCheckpoint
        match effect {
            Effect::SaveCheckpoint { trigger } => {
                assert_eq!(trigger, crate::reducer::event::CheckpointTrigger::Interrupt);
            }
            other => panic!(
                "Expected SaveCheckpoint effect when interrupted_by_user=true, got {other:?}. \
                 User interrupt should skip pre-termination safety check."
            ),
        }
    }

    #[test]
    fn programmatic_interrupt_requires_pre_termination_safety_check() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::Interrupted,
            interrupted_by_user: false,
            pre_termination_commit_checked: false,
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        // Should derive safety check effect BEFORE proceeding to termination
        match effect {
            Effect::CheckUncommittedChangesBeforeTermination => {
                // Expected - safety check required
            }
            other => panic!(
                "Expected CheckUncommittedChangesBeforeTermination when interrupted_by_user=false, got {other:?}. \
                 Programmatic interrupts must commit uncommitted work before terminating."
            ),
        }
    }

    #[test]
    fn complete_phase_requires_pre_termination_safety_check() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::Complete,
            interrupted_by_user: false,
            pre_termination_commit_checked: false,
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        // Should derive safety check before completion
        match effect {
            Effect::CheckUncommittedChangesBeforeTermination => {
                // Expected - safety check required
            }
            other => panic!(
                "Expected CheckUncommittedChangesBeforeTermination before Complete, got {other:?}"
            ),
        }
    }

    #[test]
    fn final_validation_requires_pre_termination_safety_check() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::FinalValidation,
            pre_termination_commit_checked: false,
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        // Should derive safety check before final validation
        match effect {
            Effect::CheckUncommittedChangesBeforeTermination => {
                // Expected - safety check required
            }
            other => panic!(
                "Expected CheckUncommittedChangesBeforeTermination before FinalValidation, got {other:?}"
            ),
        }
    }

    #[test]
    fn safety_check_allows_proceed_after_checked() {
        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::Interrupted,
            interrupted_by_user: false,
            pre_termination_commit_checked: true,
            prompt_permissions: PromptPermissionsState {
                restored: true,
                ..Default::default()
            },
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        // Should proceed to checkpoint save
        match effect {
            Effect::SaveCheckpoint { .. } => {
                // Expected - proceed after safety check and restoration
            }
            other => panic!(
                "Expected SaveCheckpoint after safety check and restoration complete, got {other:?}"
            ),
        }
    }

    #[test]
    fn complete_saves_checkpoint_with_phase_transition_trigger_after_safety_check() {
        use crate::reducer::event::CheckpointTrigger;

        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::Complete,
            pre_termination_commit_checked: true,
            interrupted_by_user: false,
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        match effect {
            Effect::SaveCheckpoint { trigger } => {
                assert_eq!(trigger, CheckpointTrigger::PhaseTransition);
            }
            other => panic!("expected SaveCheckpoint, got: {other:?}"),
        }
    }

    #[test]
    fn programmatic_interrupt_saves_checkpoint_with_phase_transition_trigger_after_safety_check() {
        use crate::reducer::event::CheckpointTrigger;

        let state = PipelineState::initial(1, 0);
        let state = PipelineState {
            phase: PipelinePhase::Interrupted,
            pre_termination_commit_checked: true,
            interrupted_by_user: false,
            prompt_permissions: PromptPermissionsState {
                restored: true,
                ..Default::default()
            },
            ..state
        };

        let effect = determine_next_effect_for_phase(&state);

        match effect {
            Effect::SaveCheckpoint { trigger } => {
                assert_eq!(trigger, CheckpointTrigger::PhaseTransition);
            }
            other => panic!("expected SaveCheckpoint, got: {other:?}"),
        }
    }
}