ralph-agent-loop 0.3.0

//! CI gate execution for post-run supervision.
//!
//! Responsibilities:
//! - Execute the configured CI gate command (default: make ci).
//! - Capture stdout/stderr for compliance messages.
//! - Detect common error patterns and provide specific guidance.
//! - Provide command label for error messages.
//!
//! Not handled here:
//! - Queue maintenance (see queue_ops.rs).
//! - Git operations (see git_ops.rs).
//!
//! Invariants/assumptions:
//! - CI gate command is configured or defaults to "make ci".
//! - Command output is captured (not inherited) to include in compliance messages.
//! - Error pattern detection is best-effort; undetected patterns fall back to generic guidance.

use super::logging;
#[path = "ci_format.rs"]
mod ci_format;
#[path = "ci_patterns.rs"]
mod ci_patterns;

use crate::constants::limits::{CI_FAILURE_ESCALATION_THRESHOLD, CI_GATE_AUTO_RETRY_LIMIT};
use crate::runutil;
use anyhow::{Context, Result, bail};
use ci_format::{format_ci_output_for_message, format_detected_pattern, truncate_for_log};
#[cfg(test)]
use ci_patterns::{
    DetectedErrorPattern, detect_format_check_error, detect_lint_check_error,
    detect_lock_contention_error, detect_ruff_error, detect_toml_parse_error,
    detect_unknown_variant_error, extract_invalid_value, extract_line_number, extract_valid_values,
    infer_file_path,
};
use ci_patterns::{LOCK_CONTENTION_GUIDANCE, detect_ci_error_pattern};
use std::time::Instant;

/// Get a stable key representing the error pattern for comparison.
/// Returns None if no pattern detected, or Some(pattern_type) if detected.
#[cfg(test)]
fn get_error_pattern_key(result: &CiGateResult) -> Option<String> {
    detect_ci_error_pattern(&result.stdout, &result.stderr).map(|p| p.pattern_type.to_string())
}

/// Result of running the CI gate command.
#[derive(Debug)]
#[allow(dead_code)] // success field used in tests only
pub(crate) struct CiGateResult {
    pub success: bool,
    pub exit_code: Option<i32>,
    pub stdout: String,
    pub stderr: String,
}

/// CI gate failure with captured output for logging.
///
/// This error type is used when CI fails (non-zero exit code).
/// The `Display` impl includes truncated stdout/stderr and detected
/// error patterns, allowing `with_scope` to log a rich error message.
#[derive(Debug)]
pub(crate) struct CiFailure {
    /// Exit code from the CI command
    pub exit_code: Option<i32>,
    /// Full stdout (kept for compliance messages)
    pub stdout: String,
    /// Full stderr (kept for compliance messages)
    pub stderr: String,
    /// Detected error pattern type, if any
    pub error_pattern: Option<&'static str>,
}

impl CiFailure {
    pub(crate) fn blocking_state(&self) -> crate::contracts::BlockingState {
        crate::contracts::BlockingState::ci_blocked(
            self.exit_code,
            self.error_pattern.map(str::to_string),
        )
    }
}

impl std::fmt::Display for CiFailure {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let exit_code = self.exit_code.unwrap_or(-1);
        write!(f, "CI failed with exit code {}", exit_code)?;

        if let Some(pattern) = &self.error_pattern {
            write!(f, " [{}]", pattern)?;
        }

        // Include truncated output for immediate visibility in logs
        let stderr_preview = truncate_for_log(&self.stderr, 500);
        let stdout_preview = truncate_for_log(&self.stdout, 500);

        if !stderr_preview.is_empty() {
            write!(f, "\n>>> stderr:\n{}", stderr_preview)?;
        }
        if !stdout_preview.is_empty() {
            write!(f, "\n>>> stdout:\n{}", stdout_preview)?;
        }

        Ok(())
    }
}

impl std::error::Error for CiFailure {}

/// Executes the CI gate command if enabled and always returns the captured result.
pub(crate) fn capture_ci_gate_result(resolved: &crate::config::Resolved) -> Result<CiGateResult> {
    let ci_gate = resolved
        .config
        .agent
        .ci_gate
        .as_ref()
        .filter(|ci_gate| ci_gate.is_enabled());
    let Some(ci_gate) = ci_gate else {
        log::info!("CI gate disabled; skipping.");
        return Ok(CiGateResult {
            success: true,
            exit_code: None,
            stdout: String::new(),
            stderr: String::new(),
        });
    };

    let command = ci_gate.display_string();

    logging::with_scope(&format!("CI gate ({command})"), || {
        log::info!(
            "CI gate command started (may take several minutes): {}",
            command
        );
        let started = Instant::now();

        let output = runutil::execute_ci_gate(ci_gate, &resolved.repo_root).with_context(|| {
            format!(
                "run CI gate command '{}' in {}",
                command,
                resolved.repo_root.display()
            )
        })?;

        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
        let success = output.status.success();
        let exit_code = output.status.code();
        let elapsed = started.elapsed();
        log::info!(
            "CI gate command finished in {:.1}s with exit code {:?}",
            elapsed.as_secs_f64(),
            exit_code
        );

        if !success
            && detect_ci_error_pattern(&stdout, &stderr)
                .as_ref()
                .is_some_and(|pattern| pattern.pattern_type == "Lock contention")
        {
            log::warn!(
                "CI gate failure indicates lock contention. {}",
                LOCK_CONTENTION_GUIDANCE
            );
        }

        Ok(CiGateResult {
            success,
            exit_code,
            stdout,
            stderr,
        })
    })
}

/// Executes the CI gate command if enabled.
///
/// Returns a CiGateResult containing success status, exit code, and captured output.
pub(crate) fn run_ci_gate(resolved: &crate::config::Resolved) -> Result<CiGateResult> {
    let result = capture_ci_gate_result(resolved)?;
    if result.success {
        return Ok(result);
    }

    let detected = detect_ci_error_pattern(&result.stdout, &result.stderr);
    let error_pattern = detected.as_ref().map(|p| p.pattern_type);

    Err(CiFailure {
        exit_code: result.exit_code,
        stdout: result.stdout,
        stderr: result.stderr,
        error_pattern,
    }
    .into())
}

/// Build a combined CI failure message that includes CI output context.
///
/// Used when user intervention provides additional guidance via RevertOutcome::Continue.
/// Ensures agent always sees CI output even when user provides a custom message.
fn build_ci_failure_message_with_user_input(
    resolved: &crate::config::Resolved,
    result: &CiGateResult,
    user_message: &str,
) -> String {
    let strict_message = strict_ci_gate_compliance_message(resolved, result);
    if user_message.trim().is_empty() {
        return strict_message;
    }
    format!(
        "{}\n\n---\n\nAgent message from user intervention:\n{}",
        strict_message, user_message
    )
}

fn strict_ci_gate_compliance_message(
    resolved: &crate::config::Resolved,
    result: &CiGateResult,
) -> String {
    let cmd = ci_gate_command_label(resolved);

    // Include head (early errors) and tail (test failures) of output in the message
    let output_snippet = format_ci_output_for_message(&result.stdout, &result.stderr, 50, 50);

    // Format exit code as a number, using -1 if unavailable (e.g., killed by signal)
    let exit_code_display = result.exit_code.unwrap_or(-1);

    // Detect error patterns and generate specific guidance
    let detected = detect_ci_error_pattern(&result.stdout, &result.stderr);
    let specific_guidance = detected
        .as_ref()
        .map(format_detected_pattern)
        .unwrap_or_default();

    format!(
        r#"CI gate ({cmd}): CI failed with exit code {exit_code_display}.

{output_snippet}
{specific_guidance}Fix the errors above before continuing. You MUST see the CI gate pass before this turn can end.

COMMON PATTERNS:
- "ruff failed: TOML parse error" -> Check pyproject.toml for invalid values at the mentioned line
- "unknown variant X, expected one of Y" -> X is invalid, use one of Y instead
- "format-check failed" -> Run the formatter to see what needs changing
- "lint-check failed" -> Run the linter directly to see errors

NO skipping tests, half-assed patches, or sloppy shortcuts."#
    )
}

fn ci_gate_result_from_failure(result: &CiFailure) -> CiGateResult {
    CiGateResult {
        success: false,
        exit_code: result.exit_code,
        stdout: result.stdout.clone(),
        stderr: result.stderr.clone(),
    }
}

fn send_continue_message<F>(
    resolved: &crate::config::Resolved,
    continue_session: &mut super::ContinueSession,
    message: &str,
    on_resume: &mut F,
    plugins: Option<&crate::plugins::registry::PluginRegistry>,
) -> Result<()>
where
    F: FnMut(&crate::runner::RunnerOutput, std::time::Duration) -> Result<()>,
{
    let resumed = super::resume_continue_session(resolved, continue_session, message, plugins)?;
    on_resume(&resumed.output, resumed.elapsed)
}

/// Executes CI gate with auto-retry and Continue support via a runner session.
pub(crate) fn run_ci_gate_with_continue_session<F>(
    resolved: &crate::config::Resolved,
    git_revert_mode: crate::contracts::GitRevertMode,
    revert_prompt: Option<&runutil::RevertPromptHandler>,
    continue_session: &mut super::ContinueSession,
    mut on_resume: F,
    plugins: Option<&crate::plugins::registry::PluginRegistry>,
) -> Result<()>
where
    F: FnMut(&crate::runner::RunnerOutput, std::time::Duration) -> Result<()>,
{
    loop {
        // run_ci_gate returns Ok(CiGateResult) on success, Err(CiFailure) on CI failure
        let result = match run_ci_gate(resolved) {
            Ok(_) => {
                // CI passed - reset error tracking and exit loop
                continue_session.last_ci_error_pattern = None;
                continue_session.consecutive_same_error_count = 0;
                return Ok(());
            }
            Err(err) => {
                // Check if this is a CI failure (retryable) or another error
                err.downcast::<CiFailure>()?
            }
        };

        // Get current error pattern and update consecutive count
        let current_pattern = result.error_pattern.as_ref().map(|p| p.to_string());

        match (&continue_session.last_ci_error_pattern, &current_pattern) {
            (Some(last), Some(current)) if last == current => {
                continue_session.consecutive_same_error_count = continue_session
                    .consecutive_same_error_count
                    .saturating_add(1);
            }
            _ => {
                // Different error or no pattern - reset counter
                continue_session.consecutive_same_error_count = 1;
            }
        }
        continue_session.last_ci_error_pattern = current_pattern.clone();

        // Check for escalation threshold (same error repeated N times)
        if continue_session.consecutive_same_error_count >= CI_FAILURE_ESCALATION_THRESHOLD {
            log::error!(
                "CI gate failed {} times with same error pattern '{}'; escalating",
                continue_session.consecutive_same_error_count,
                current_pattern.as_deref().unwrap_or("unknown")
            );

            let gate_result = ci_gate_result_from_failure(&result);

            let detected = detect_ci_error_pattern(&result.stdout, &result.stderr);
            let specific_guidance = detected
                .as_ref()
                .map(format_detected_pattern)
                .unwrap_or_default();

            let outcome = runutil::apply_git_revert_mode(
                &resolved.repo_root,
                git_revert_mode,
                "CI failure escalation",
                revert_prompt,
            )?;

            match outcome {
                runutil::RevertOutcome::Continue { message } => {
                    let combined_message =
                        build_ci_failure_message_with_user_input(resolved, &gate_result, &message);
                    send_continue_message(
                        resolved,
                        continue_session,
                        &combined_message,
                        &mut on_resume,
                        plugins,
                    )?;

                    // User intervention supplied new guidance; give the agent a fresh retry window.
                    continue_session.last_ci_error_pattern = None;
                    continue_session.consecutive_same_error_count = 0;
                    continue_session.ci_failure_retry_count = 0;
                    continue;
                }
                _ => {
                    bail!(
                        "{} Error: CI failed {} consecutive times with the same error.\n\n\
                         The agent is not making progress on this issue.\n\n\
                         Error pattern: {}\n\n\
                         {}\n\n\
                         MANUAL INTERVENTION REQUIRED: The automated compliance messages \
                         are not resolving this CI failure. Please investigate the root cause \
                         directly and fix it before re-running.",
                        runutil::format_revert_failure_message(
                            "CI gate repeated failure escalation.",
                            outcome,
                        ),
                        continue_session.consecutive_same_error_count,
                        current_pattern.as_deref().unwrap_or("unrecognized"),
                        specific_guidance
                    );
                }
            }
        }

        // Existing retry logic for attempts below threshold
        if continue_session.ci_failure_retry_count < CI_GATE_AUTO_RETRY_LIMIT {
            continue_session.ci_failure_retry_count =
                continue_session.ci_failure_retry_count.saturating_add(1);
            let attempt = continue_session.ci_failure_retry_count;

            log::warn!(
                "CI gate failed; auto-sending strict compliance Continue message to agent (attempt {}/{})",
                attempt,
                CI_GATE_AUTO_RETRY_LIMIT
            );

            // Include the CI output in the compliance message
            // Build CiGateResult from CiFailure for message formatting
            let gate_result = ci_gate_result_from_failure(&result);
            let message = strict_ci_gate_compliance_message(resolved, &gate_result);
            send_continue_message(
                resolved,
                continue_session,
                &message,
                &mut on_resume,
                plugins,
            )?;
            continue;
        }

        let outcome = runutil::apply_git_revert_mode(
            &resolved.repo_root,
            git_revert_mode,
            "CI failure",
            revert_prompt,
        )?;

        match outcome {
            runutil::RevertOutcome::Continue { message } => {
                // Prepend strict CI compliance message to ensure agent sees CI output
                let gate_result = ci_gate_result_from_failure(&result);
                let combined_message =
                    build_ci_failure_message_with_user_input(resolved, &gate_result, &message);
                send_continue_message(
                    resolved,
                    continue_session,
                    &combined_message,
                    &mut on_resume,
                    plugins,
                )?;
                continue;
            }
            _ => {
                let exit_code_display = result.exit_code.unwrap_or(-1);
                bail!(
                    "{} Error: CI failed with exit code {exit_code_display}",
                    runutil::format_revert_failure_message(
                        "CI gate failed after changes. Fix issues reported by CI and rerun.",
                        outcome,
                    ),
                );
            }
        }
    }
}

/// Returns the CI gate command label for display purposes.
pub(crate) fn ci_gate_command_label(resolved: &crate::config::Resolved) -> String {
    resolved
        .config
        .agent
        .ci_gate
        .as_ref()
        .map(|ci_gate| ci_gate.display_string())
        .unwrap_or_else(|| "disabled".to_string())
}

#[cfg(test)]
#[path = "ci_tests.rs"]
mod tests;