forjar 1.4.2

Rust-native Infrastructure as Code — bare-metal first, BLAKE3 state, provenance tracing
Documentation
//! FJ-010/011/021/230: Transport abstraction — local, SSH, container, and pepita execution.

pub mod container;
pub mod local;
pub mod pepita;
pub mod ssh;

#[cfg(test)]
mod tests_container;
#[cfg(test)]
mod tests_container_b;
#[cfg(test)]
mod tests_container_c;
#[cfg(test)]
mod tests_container_d;
#[cfg(test)]
mod tests_dispatch;
#[cfg(test)]
mod tests_dispatch_b;
#[cfg(test)]
mod tests_ssh;

use crate::core::types::Machine;

/// Output from executing a script on a target.
#[derive(Debug, Clone)]
pub struct ExecOutput {
    /// Process exit code.
    pub exit_code: i32,
    /// Captured standard output.
    pub stdout: String,
    /// Captured standard error.
    pub stderr: String,
}

impl ExecOutput {
    /// Returns true if the process exited with code 0.
    pub fn success(&self) -> bool {
        self.exit_code == 0
    }
}

/// FJ-1357: Validate script via bashrs before execution (I8 enforcement gate).
///
/// FJ-29: Strip opaque data payloads before linting. Two patterns:
/// 1. Base64 blobs from `source:` file resources — binary data in single quotes
/// 2. Heredoc payloads from `content:` file resources — user content between delimiters
///
/// Both contain data that bashrs misinterprets as shell syntax. The data is never
/// executed as shell — it is written to files via pipe or heredoc redirection.
fn validate_before_exec(script: &str) -> Result<(), String> {
    let sanitised = strip_data_payloads(script);
    crate::core::purifier::validate_script(&sanitised)
        .map_err(|e| format!("I8 violation — script failed bashrs validation: {e}"))
}

/// Strip opaque data payloads that bashrs should not lint.
///
/// Handles four forjar codegen patterns:
/// 1. `echo '<base64>' | base64 -d > '<path>'` — binary file deployment
/// 2. `cat > '<path>' <<'FORJAR_EOF'\n...\nFORJAR_EOF` — text file deployment
/// 3. Copia delta patch temp file operations (`rm -f "$TMPFILE"`, `mv "$TMPFILE" ...`)
///    which use absolute paths that trigger bashrs SEC010 false positives
/// 4. Cargo cache staging operations (`cp`, `mkdir -p`, `rm -rf` with `_STAGING`/`_CACHE_DIR`)
///    which are safe by construction but trigger SEC010/SEC011 false positives
fn strip_data_payloads(script: &str) -> String {
    // Phase 1: strip base64 blobs
    let re_b64 = regex::Regex::new(r"echo '([A-Za-z0-9+/=\n]+)' \| base64 -d > '([^']+)'")
        .expect("base64 regex is valid");
    let pass1 = re_b64
        .replace_all(script, "echo 'FORJAR_BASE64_STRIPPED' > '$2'")
        .into_owned();

    // Phase 2: strip heredoc payloads (FORJAR_EOF delimiters)
    let re_heredoc =
        regex::Regex::new(r"(?s)<<'FORJAR_EOF'\n.*?\nFORJAR_EOF").expect("heredoc regex is valid");
    let pass2 = re_heredoc
        .replace_all(
            &pass1,
            "<<'FORJAR_EOF'\n# payload stripped for lint\nFORJAR_EOF",
        )
        .into_owned();

    // Phase 3: strip forjar-generated copia delta temp file operations
    // These use TMPFILE variable for atomic file replacement and always
    // reference absolute paths, which triggers bashrs SEC010 false positives.
    let re_copia_rm = regex::Regex::new(r#"rm -f "\$TMPFILE""#).expect("copia rm regex is valid");
    let pass3 = re_copia_rm
        .replace_all(&pass2, "# forjar-copia: tmpfile cleanup stripped")
        .into_owned();
    let re_copia_mv =
        regex::Regex::new(r#"mv "\$TMPFILE" '[^']+'"#).expect("copia mv regex is valid");
    let pass4 = re_copia_mv
        .replace_all(&pass3, "# forjar-copia: atomic replace stripped")
        .into_owned();

    // Phase 4: strip forjar-generated cargo cache staging operations
    // These use _STAGING, _CACHE_DIR, _CARGO_BIN variables that are safe by
    // construction (mktemp -d, derived from $HOME) but trigger SEC010/SEC011.
    // Match any line containing cp/mkdir/rm with these internal variables.
    let re_cargo_ops = regex::Regex::new(
        r#"(?m)^\s*(?:cp|mkdir -p|rm -rf?)\s+.*\$_(?:STAGING|CACHE_DIR|CARGO_BIN).*$"#,
    )
    .expect("cargo ops regex is valid");
    re_cargo_ops
        .replace_all(&pass4, "# forjar-cargo: cache op stripped")
        .into_owned()
}

/// Execute a purified shell script on a machine.
/// Dispatches to pepita, container, local, or SSH based on transport/address.
/// Priority: pepita > container > local > SSH.
///
/// I8 invariant: script is validated via bashrs before any execution.
pub fn exec_script(machine: &Machine, script: &str) -> Result<ExecOutput, String> {
    validate_before_exec(script)?;

    // Pepita (kernel namespace) transport takes highest priority
    if machine.is_pepita_transport() {
        return pepita::exec_pepita(machine, script);
    }

    // Container transport takes priority over local/SSH
    if machine.is_container_transport() {
        return container::exec_container(machine, script);
    }

    let is_local =
        machine.addr == "127.0.0.1" || machine.addr == "localhost" || is_local_addr(&machine.addr);

    if is_local {
        local::exec_local(script)
    } else {
        ssh::exec_ssh(machine, script)
    }
}

/// Execute a script with an optional timeout (in seconds).
/// Returns an error if the script exceeds the timeout.
pub fn exec_script_timeout(
    machine: &Machine,
    script: &str,
    timeout_secs: Option<u64>,
) -> Result<ExecOutput, String> {
    match timeout_secs {
        Some(secs) => {
            let hostname = machine.hostname.clone();
            let machine = machine.clone();
            let script = script.to_string();
            let (tx, rx) = std::sync::mpsc::channel();
            std::thread::spawn(move || {
                let result = exec_script(&machine, &script);
                let _ = tx.send(result);
            });
            rx.recv_timeout(std::time::Duration::from_secs(secs))
                .map_err(|_| {
                    format!("transport timeout: script on '{hostname}' exceeded {secs}s limit")
                })?
        }
        None => exec_script(machine, script),
    }
}

/// Check if a machine uses SSH transport (not pepita, container, or local).
pub fn is_ssh_transport(machine: &Machine) -> bool {
    !machine.is_pepita_transport()
        && !machine.is_container_transport()
        && machine.addr != "127.0.0.1"
        && machine.addr != "localhost"
        && !is_local_addr(&machine.addr)
}

/// FJ-261: Execute a script with SSH retry on transient failures.
/// `ssh_retries` is total attempt count (1 = no retry, 3 = up to 3 attempts).
/// Retries only apply to SSH transport; local/container calls are not retried.
/// Backoff: 200ms × 2^attempt. Capped at 4 attempts max.
pub fn exec_script_retry(
    machine: &Machine,
    script: &str,
    timeout_secs: Option<u64>,
    ssh_retries: u32,
) -> Result<ExecOutput, String> {
    let is_ssh = is_ssh_transport(machine);
    let max_attempts = if is_ssh { ssh_retries.clamp(1, 4) } else { 1 };

    let mut last_err = String::new();
    for attempt in 0..max_attempts {
        if attempt > 0 {
            let backoff_ms = 200u64 * (1u64 << (attempt - 1));
            std::thread::sleep(std::time::Duration::from_millis(backoff_ms));
            eprintln!(
                "  [retry {}/{}] retrying SSH to {} after {}ms backoff",
                attempt,
                max_attempts - 1,
                machine.addr,
                backoff_ms
            );
        }

        match exec_script_timeout(machine, script, timeout_secs) {
            Ok(out) => return Ok(out),
            Err(e) => {
                if attempt + 1 < max_attempts && is_transient_ssh_error(&e) {
                    last_err = e;
                    continue;
                }
                return Err(e);
            }
        }
    }

    Err(last_err)
}

/// Check if an SSH error is transient (worth retrying).
fn is_transient_ssh_error(err: &str) -> bool {
    let lower = err.to_lowercase();
    lower.contains("connection refused")
        || lower.contains("connection reset")
        || lower.contains("connection timed out")
        || lower.contains("broken pipe")
        || lower.contains("no route to host")
        || lower.contains("transport timeout")
        || lower.contains("failed to spawn ssh")
}

/// Execute a read-only query (for plan/drift — doesn't need tripwire).
///
/// I8 invariant: query command is validated via bashrs before execution.
pub fn query(machine: &Machine, cmd: &str) -> Result<ExecOutput, String> {
    // exec_script already validates, but we gate here explicitly for
    // defense-in-depth in case query ever takes a different path.
    validate_before_exec(cmd)?;
    exec_script(machine, cmd)
}

/// Check if an address is this machine.
fn is_local_addr(addr: &str) -> bool {
    // Check if the address matches any local interface
    if addr == "127.0.0.1" || addr == "localhost" || addr == "::1" {
        return true;
    }
    // Check hostname
    if let Ok(hostname) = std::fs::read_to_string("/etc/hostname") {
        if addr == hostname.trim() {
            return true;
        }
    }
    false
}