ralph-workflow 0.7.18

PROMPT-driven multi-agent orchestrator for git repos
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
use super::common::TestFixture;
use crate::executor::MockProcessExecutor;
use crate::reducer::boundary::MainEffectHandler;
use crate::reducer::event::{AgentEvent, PipelineEvent};
use crate::reducer::state::{ContinuationState, PipelineState, SameAgentRetryReason};
use crate::workspace::{MemoryWorkspace, Workspace};
use std::sync::Arc;

#[test]
fn test_invoke_analysis_agent_gracefully_handles_missing_plan_and_diff() {
    // Regression: analysis should still run even when PLAN.md is missing or git diff cannot
    // be generated. These inputs should be substituted with placeholders.
    let workspace = MemoryWorkspace::new_test().with_dir(".agent/tmp");
    let mut fixture = TestFixture::with_workspace(workspace);
    let mut ctx = fixture.ctx();
    ctx.developer_agent = "claude";

    let mut handler = MainEffectHandler::new(PipelineState {
        phase: crate::reducer::event::PipelinePhase::Development,
        iteration: 0,
        ..PipelineState::initial(1, 0)
    });

    handler
        .invoke_analysis_agent(&mut ctx, 0)
        .expect("invoke_analysis_agent should not fail when PLAN/DIFF inputs are missing");

    // Validate that the agent was invoked and the prompt has the analysis task structure.
    //
    // MemoryWorkspace has no on-disk .git, so get_git_diff_from_start_with_workspace
    // returns Err immediately — the diff section always contains the placeholder.
    // This assertion verifies the graceful fallback without relying on real git state.
    let calls = fixture.executor.agent_calls();
    assert_eq!(calls.len(), 1);
    let prompt = &calls[0].prompt;
    assert!(
        prompt.contains("Your task is to VERIFY whether the code changes satisfy the PLAN"),
        "expected analysis prompt header in prompt, got: {prompt}"
    );
    assert!(
        prompt.contains("[DIFF unavailable"),
        "expected diff placeholder in prompt for MemoryWorkspace (no .git on disk), got: {prompt}"
    );
}

#[test]
fn test_invoke_analysis_agent_xsd_retry_uses_existing_xsd_retry_template() {
    let workspace = MemoryWorkspace::new_test()
        .with_dir(".agent/tmp")
        .with_file(".agent/PLAN.md", "# Plan\n")
        .with_file(".agent/tmp/development_result.xml", "<invalid xml")
        .with_file(
            ".agent/tmp/development_xsd_error.txt",
            "missing closing tag",
        );

    let mut fixture = TestFixture::with_workspace(workspace);
    let mut ctx = fixture.ctx();
    ctx.developer_agent = "claude";

    let mut handler = MainEffectHandler::new(PipelineState {
        phase: crate::reducer::event::PipelinePhase::Development,
        iteration: 0,
        continuation: ContinuationState {
            xsd_retry_pending: true,
            ..ContinuationState::new()
        },
        ..PipelineState::initial(1, 0)
    });

    handler
        .invoke_analysis_agent(&mut ctx, 0)
        .expect("invoke_analysis_agent should succeed");

    let calls = fixture.executor.agent_calls();
    assert_eq!(calls.len(), 1);
    let prompt = &calls[0].prompt;
    assert!(
        prompt.contains("XSD VALIDATION FAILED - FIX XML ONLY"),
        "expected existing XSD retry template, got: {prompt}"
    );
    assert!(
        prompt.contains("THIS IS A SUBMISSION-FIX-ONLY RETRY"),
        "expected XML-only retry guardrail, got: {prompt}"
    );

    let last_output = fixture
        .workspace
        .read(std::path::Path::new(".agent/tmp/last_output.xml"))
        .expect("last_output.xml should be materialized for analysis XSD retry");
    assert_eq!(last_output, "<invalid xml");
}

#[test]
fn test_invoke_analysis_agent_same_agent_retry_timeout_with_context_includes_context_file_guidance()
{
    let timeout_context_file_path = ".agent/tmp/timeout-context-analysis_1.md";
    let workspace = MemoryWorkspace::new_test()
        .with_dir(".agent/tmp")
        .with_file(".agent/PLAN.md", "# Plan\n")
        .with_file(".agent/DIFF.backup", "DIFF_BACKUP_MARKER")
        .with_file(timeout_context_file_path, "TIMEOUT_CONTEXT_MARKER");

    let mut fixture = TestFixture::with_workspace(workspace);
    let mut ctx = fixture.ctx();
    ctx.developer_agent = "claude";

    let mut handler = MainEffectHandler::new(PipelineState {
        phase: crate::reducer::event::PipelinePhase::Development,
        iteration: 0,
        continuation: ContinuationState {
            same_agent_retry_pending: true,
            same_agent_retry_reason: Some(SameAgentRetryReason::TimeoutWithContext),
            timeout_context_file_path: Some(timeout_context_file_path.to_string()),
            ..ContinuationState::new()
        },
        ..PipelineState::initial(1, 0)
    });

    handler
        .invoke_analysis_agent(&mut ctx, 0)
        .expect("invoke_analysis_agent should succeed");

    let calls = fixture.executor.agent_calls();
    assert_eq!(calls.len(), 1);
    let prompt = &calls[0].prompt;

    assert!(
        prompt.contains("## Retry Note"),
        "expected same-agent retry preamble in analysis prompt, got: {prompt}"
    );
    assert!(
        prompt.contains("timed out with partial progress"),
        "expected timeout-with-context retry guidance in analysis prompt, got: {prompt}"
    );
    assert!(
        prompt.contains(timeout_context_file_path),
        "expected analysis prompt to reference timeout context file path, got: {prompt}"
    );
    assert!(
        prompt.contains("Read that file first to continue from where you left off."),
        "expected analysis prompt to instruct reading timeout context file, got: {prompt}"
    );
}

#[test]
fn test_invoke_analysis_agent_writes_diff_backup_when_git_diff_succeeds() {
    // When git diff generation succeeds, the handler should still write/update
    // `.agent/DIFF.backup` as a best-effort fallback for prompt materialization.
    let workspace = MemoryWorkspace::new_test()
        .with_dir(".agent/tmp")
        .with_file(".agent/PLAN.md", "# Plan\n")
        .with_file(".agent/DIFF.backup", "DIFF_BACKUP_MARKER");

    let mut fixture = TestFixture::with_workspace(workspace);
    let mut ctx = fixture.ctx();
    ctx.developer_agent = "claude";

    let mut handler = MainEffectHandler::new(PipelineState {
        phase: crate::reducer::event::PipelinePhase::Development,
        iteration: 0,
        ..PipelineState::initial(1, 0)
    });

    handler
        .invoke_analysis_agent(&mut ctx, 0)
        .expect("invoke_analysis_agent should succeed");

    let calls = fixture.executor.agent_calls();
    assert_eq!(calls.len(), 1);
    let prompt = &calls[0].prompt;
    assert!(
        prompt.contains("diff --git") || prompt.contains("[DIFF unavailable"),
        "expected a git diff or a diff-unavailable placeholder in prompt"
    );

    let backup = fixture
        .workspace
        .read(std::path::Path::new(".agent/DIFF.backup"))
        .expect("expected .agent/DIFF.backup to exist");
    assert!(
        backup.contains("diff --git") || backup.contains("[DIFF unavailable"),
        "expected .agent/DIFF.backup to contain a git diff or placeholder"
    );
    assert_ne!(
        backup, "DIFF_BACKUP_MARKER",
        "expected .agent/DIFF.backup to be refreshed"
    );
}

#[test]
fn test_invoke_analysis_agent_uses_repo_root_for_diff_not_start_commit_baseline() {
    // TDD regression: analysis must generate its diff from `ctx.repo_root` via
    // `git_diff_in_repo`, not via workspace-based `.agent/start_commit` baseline logic.
    //
    // This test is deterministic: it creates an isolated on-disk git repo with a
    // known working-tree change, then asserts that the analysis prompt contains a
    // diff for that repo (including a unique marker).
    //
    // IMPORTANT: Avoid mutating the process CWD here. CWD is process-global and Rust
    // tests run in parallel by default.
    use std::path::Path;

    let repo_dir = tempfile::TempDir::new().expect("create temp git repo");
    let repo = git2::Repository::init(repo_dir.path()).expect("init git repo");

    // Create an initial commit so the diff baseline is HEAD.
    let marker_file = "ralph_test_repo_root_diff_marker.txt";
    let marker_abs = repo_dir.path().join(marker_file);
    std::fs::write(&marker_abs, "initial\n").expect("write marker file");

    let mut index = repo.index().expect("open index");
    index
        .add_path(Path::new(marker_file))
        .expect("add marker file");
    index.write().expect("write index");
    let tree_oid = index.write_tree().expect("write tree");
    let tree = repo.find_tree(tree_oid).expect("find tree");
    let sig = git2::Signature::now("test", "test@test.com").expect("signature");
    repo.commit(Some("HEAD"), &sig, &sig, "init", &tree, &[])
        .expect("create initial commit");

    // Modify the tracked file to create a deterministic patch.
    let unique_marker = "UNIQUE_REPO_ROOT_MARKER";
    std::fs::write(&marker_abs, format!("initial\nmodified\n{unique_marker}\n"))
        .expect("modify marker file");

    let workspace = MemoryWorkspace::new_test()
        .with_dir(".agent/tmp")
        .with_file(".agent/PLAN.md", "# Plan\n");

    let mut fixture = TestFixture::with_workspace(workspace);
    fixture.repo_root = repo_dir.path().to_path_buf();

    let mut ctx = fixture.ctx();
    ctx.developer_agent = "claude";

    let mut handler = MainEffectHandler::new(PipelineState {
        phase: crate::reducer::event::PipelinePhase::Development,
        iteration: 0,
        ..PipelineState::initial(1, 0)
    });

    handler
        .invoke_analysis_agent(&mut ctx, 0)
        .expect("invoke_analysis_agent should succeed");

    let calls = fixture.executor.agent_calls();
    assert_eq!(calls.len(), 1);
    let prompt = &calls[0].prompt;

    // The key assertion: the prompt must include a patch for the repo at repo_root.
    assert!(
        prompt.contains("diff --git a/ralph_test_repo_root_diff_marker.txt b/ralph_test_repo_root_diff_marker.txt"),
        "expected analysis prompt to include diff for marker file from ctx.repo_root; got: {prompt}"
    );
    assert!(
        prompt.contains(unique_marker),
        "expected analysis prompt to include unique marker from ctx.repo_root diff; got: {prompt}"
    );
    assert!(
        !prompt.contains("[DIFF unavailable"),
        "expected diff generation to succeed; got: {prompt}"
    );
}

#[test]
fn test_invoke_analysis_agent_uses_head_baseline_not_start_commit() {
    // TDD regression: invoke_analysis_agent must generate its diff from HEAD
    // (working-tree vs last commit), NOT from .agent/start_commit (pipeline-start baseline).
    //
    // Proof strategy (A/B/C):
    //   Commit A: initial commit (baseline)
    //   Commit B: committed change (already in history — must NOT appear in analysis diff)
    //   Change C: uncommitted modification with unique marker (MUST appear in analysis diff)
    //
    // If HEAD baseline is used: diff shows only C. ✓
    // If start_commit baseline is used: diff shows both B and C. ✗
    //
    // IMPORTANT: Use an isolated tempdir repo; never mutate process CWD (test parallelism).
    use std::path::Path;

    let repo_dir = tempfile::TempDir::new().expect("create temp git repo");
    let repo = git2::Repository::init(repo_dir.path()).expect("init git repo");
    let sig = git2::Signature::now("test", "test@test.com").expect("signature");

    // Commit A: create two tracked files.
    // file_committed: will hold the "already committed" change (commit B).
    // file_working:   will hold the uncommitted working-tree change (C).
    let file_committed = "analysis_committed_change.txt";
    let file_working = "analysis_working_change.txt";
    let abs_committed = repo_dir.path().join(file_committed);
    let abs_working = repo_dir.path().join(file_working);
    std::fs::write(&abs_committed, "base content\n").expect("write committed file A");
    std::fs::write(&abs_working, "base content\n").expect("write working file A");
    let mut index = repo.index().expect("open index A");
    index
        .add_path(Path::new(file_committed))
        .expect("stage committed file A");
    index
        .add_path(Path::new(file_working))
        .expect("stage working file A");
    index.write().expect("write index A");
    let tree_a = repo
        .find_tree(index.write_tree().expect("write tree A"))
        .expect("find tree A");
    repo.commit(Some("HEAD"), &sig, &sig, "commit A: initial", &tree_a, &[])
        .expect("create commit A");

    // Commit B: modify file_committed — becomes part of history.
    // HEAD baseline: file_committed has NO working-tree changes (HEAD == workdir for this file).
    // start_commit baseline: file_committed would show committed_marker as added.
    let committed_marker = "ANALYSIS_COMMITTED_CHANGE_MUST_NOT_APPEAR_IN_DIFF";
    std::fs::write(
        &abs_committed,
        format!("base content\n{committed_marker}\n"),
    )
    .expect("write committed file for commit B");
    let mut index = repo.index().expect("open index B");
    index
        .add_path(Path::new(file_committed))
        .expect("stage committed file B");
    index.write().expect("write index B");
    let tree_b = repo
        .find_tree(index.write_tree().expect("write tree B"))
        .expect("find tree B");
    let parent_a = repo
        .head()
        .expect("head after A")
        .peel_to_commit()
        .expect("commit A");
    repo.commit(
        Some("HEAD"),
        &sig,
        &sig,
        "commit B: committed change",
        &tree_b,
        &[&parent_a],
    )
    .expect("create commit B");

    // Change C: modify file_working without staging (MUST appear in HEAD diff).
    // file_working is tracked (in A) but untouched in B, so HEAD has base content.
    let uncommitted_marker = "ANALYSIS_UNCOMMITTED_CHANGE_MUST_APPEAR_IN_DIFF";
    std::fs::write(
        &abs_working,
        format!("base content\n{uncommitted_marker}\n"),
    )
    .expect("write uncommitted change to working file");

    // Set up fixture with isolated repo root. Workspace has no .agent/start_commit file,
    // so any start_commit-based code path would either error or use a wrong baseline.
    let workspace = MemoryWorkspace::new_test()
        .with_dir(".agent/tmp")
        .with_file(".agent/PLAN.md", "# Plan\n");

    let mut fixture = TestFixture::with_workspace(workspace);
    fixture.repo_root = repo_dir.path().to_path_buf();
    let mut ctx = fixture.ctx();
    ctx.developer_agent = "claude";

    let mut handler = MainEffectHandler::new(PipelineState {
        phase: crate::reducer::event::PipelinePhase::Development,
        iteration: 0,
        ..PipelineState::initial(1, 0)
    });

    handler
        .invoke_analysis_agent(&mut ctx, 0)
        .expect("invoke_analysis_agent should succeed with isolated repo");

    let calls = fixture.executor.agent_calls();
    assert_eq!(calls.len(), 1, "expected exactly one agent invocation");
    let prompt = &calls[0].prompt;

    // C (uncommitted) must appear — proves HEAD diff captures working tree changes.
    assert!(
        prompt.contains(uncommitted_marker),
        "expected uncommitted change marker in analysis prompt; got: {prompt}"
    );

    // B (committed) must NOT appear — proves HEAD baseline is used, not start_commit.
    assert!(
        !prompt.contains(committed_marker),
        "expected already-committed change to be ABSENT from analysis prompt (HEAD baseline); got: {prompt}"
    );
}

/// Boundary regression: analysis drain must configure `completion_output_path` to
/// `development_result.xml` so that the valid-result override works correctly.
///
/// Bug 2 root cause: Analysis drain returned `None` as the completion path, which bypassed
/// the valid-result check in the executor. When the agent produced a valid result file,
/// the timeout/error classification still saw no completion path and fell through to
/// generic failure or timeout classification.
///
/// Proof strategy: configure the mock executor to return proprietary exit code 91 (a
/// non-zero exit that is NOT a standard error, mimicking an agent that exited abnormally
/// but completed its work). When `completion_output_path` is wired correctly, the
/// executor finds the valid XML file and promotes the result to `InvocationSucceeded`
/// regardless of the exit code. If the path were `None`, the valid-file check is skipped
/// and the result would be `InvocationFailed`.
#[test]
fn test_invoke_analysis_agent_completion_output_path_wired_to_development_result_xml() {
    // Workspace contains a valid development_result.xml as the agent's output.
    let workspace = MemoryWorkspace::new_test()
        .with_dir(".agent/tmp")
        .with_file(".agent/PLAN.md", "# Plan\n")
        .with_file(
            ".agent/tmp/development_result.xml",
            "<ralph-development-result>\
             <ralph-status>completed</ralph-status>\
             </ralph-development-result>",
        );

    // Mock returns exit code 91 — a proprietary exit that does not map to a
    // standard error (no auth failure, no rate limit, no explicit timeout).
    // Without a valid completion path the executor cannot detect the result file
    // and would emit InvocationFailed. With the correct path it must emit
    // InvocationSucceeded because the valid result file wins.
    let executor = Arc::new(MockProcessExecutor::new().with_agent_result(
        "claude",
        Ok(crate::executor::AgentCommandResult::failure(91, "")),
    ));

    let workspace_arc = Arc::new(workspace.clone()) as Arc<dyn crate::workspace::Workspace>;
    let colors = crate::logger::Colors { enabled: false };
    let logger = crate::logger::Logger::new(colors);
    let mut timer = crate::pipeline::Timer::new();
    let config = crate::config::Config::default();
    let registry = crate::agents::AgentRegistry::new().unwrap();
    let template_context = crate::prompts::template_context::TemplateContext::default();
    let run_log_context = crate::logging::RunLogContext::new(&workspace).unwrap();
    let cloud = crate::config::types::CloudConfig::disabled();
    let git_env = crate::runtime::environment::mock::MockGitEnvironment::new();

    let mut ctx = crate::phases::PhaseContext {
        config: &config,
        registry: &registry,
        logger: &logger,
        colors: &colors,
        timer: &mut timer,
        developer_agent: "claude",
        reviewer_agent: "rev",
        review_guidelines: None,
        template_context: &template_context,
        run_context: crate::checkpoint::RunContext::new(),
        execution_history: crate::checkpoint::execution_history::ExecutionHistory::new(),
        executor: executor.as_ref(),
        executor_arc: Arc::clone(&executor) as Arc<dyn crate::executor::ProcessExecutor>,
        repo_root: std::path::Path::new("/mock/repo"),
        workspace: &workspace,
        workspace_arc: Arc::clone(&workspace_arc),
        run_log_context: &run_log_context,
        cloud_reporter: None,
        cloud: &cloud,
        env: &git_env,
    };

    let mut handler = MainEffectHandler::new(PipelineState {
        phase: crate::reducer::event::PipelinePhase::Development,
        iteration: 0,
        ..PipelineState::initial(1, 0)
    });

    let result = handler
        .invoke_analysis_agent(&mut ctx, 0)
        .expect("invoke_analysis_agent should not fail");

    // The key assertion: with a valid development_result.xml AND exit code 91, the
    // additional_events must contain InvocationSucceeded — proving that
    // completion_output_path was correctly wired to development_result.xml (not None).
    //
    // `build_agent_invocation_result` always puts InvocationStarted as the primary
    // event and the actual execution result in additional_events. So we look there.
    //
    // If completion_output_path were None, the valid-result check would be skipped
    // and exit code 91 would produce InvocationFailed instead.
    let execution_event = result
        .additional_events
        .iter()
        .find(|ev| {
            matches!(
                ev,
                PipelineEvent::Agent(AgentEvent::InvocationSucceeded { .. })
                    | PipelineEvent::Agent(AgentEvent::InvocationFailed { .. })
            )
        })
        .unwrap_or(&result.event);

    assert!(
        matches!(
            execution_event,
            PipelineEvent::Agent(AgentEvent::InvocationSucceeded { .. })
        ),
        "analysis drain with valid development_result.xml and exit code 91 must produce \
         InvocationSucceeded — completion_output_path must be wired to development_result.xml; \
         got primary={:?} additional={:?}",
        result.event,
        result.additional_events
    );
}