envseal 0.3.8 - Docs.rs

//! Secret-aware process supervisor — runtime dataflow monitoring.
//!
//! Uses a fork+exec+monitor model to watch the child process's I/O for
//! secret leakage. This is the most aggressive defense tier: every write
//! to stdout/stderr is inspected in real-time and any chunk containing the
//! secret is redacted before forwarding.
//!
//! # Architecture
//!
//! ```text
//! envseal supervisor (parent)
//!   │
//!   ├── fork child
//!   │     ├── apply sandbox (namespaces — Linux only today)
//!   │     ├── apply hardening (RLIMIT_CORE, PR_SET_DUMPABLE, NO_NEW_PRIVS)
//!   │     └── exec <command>
//!   │
//!   ├── capture stdout/stderr via pipes
//!   │     ├── scan each write for secret bytes
//!   │     ├── if found: log BLOCKED event, write `[ENVSEAL:REDACTED]`
//!   │     └── if clean: pass through to real stdout/stderr
//!   │
//!   └── wait for child exit
//! ```
//!
//! # Difference from `inject::execute`
//!
//! - [`crate::execution::inject::execute`]: replaces process; fast, no overhead.
//! - [`supervised_execute`]: fork+exec+monitor. Adds pipe overhead but
//!   catches leaks. Use for Lockdown tier or when runtime detection is
//!   needed.

use std::io::{Read, Write};
use std::process::{Command, Stdio};

#[cfg(unix)]
use std::os::unix::process::CommandExt;

use zeroize::{Zeroize, Zeroizing};

use crate::error::Error;
use crate::sandbox::SandboxTier;

use super::context::{prepare_execution, PreparedExecution};

/// Result of a supervised execution — includes dataflow events.
pub struct SupervisedResult {
    /// Child exit code.
    pub exit_code: i32,
    /// Number of times the secret was found in stdout/stderr.
    pub leak_events: usize,
    /// Dataflow events recorded during execution.
    pub events: Vec<DataflowEvent>,
}

/// A single dataflow event observed during supervised execution.
#[derive(Debug, Clone)]
pub struct DataflowEvent {
    /// What happened.
    pub kind: DataflowEventKind,
    /// Timestamp (monotonic millis since execution start).
    pub timestamp_ms: u64,
    /// Additional detail.
    pub detail: String,
}

/// Types of dataflow events.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DataflowEventKind {
    /// Secret injected into the child process.
    SecretInjected,
    /// Secret detected in stdout — output redacted.
    LeakedStdout,
    /// Secret detected in stderr — output redacted.
    LeakedStderr,
    /// Child process exited.
    ProcessExited,
}

/// Execute a command under full supervision with secret leak detection.
///
/// Uses [`prepare_execution`] for all security checks, then wraps the child
/// in a pipe monitor that scans for secret leakage.
///
/// # Errors
///
/// Refuses non-`None` tiers on platforms where the `pre_exec` sandbox path
/// is not available (returns [`Error::CryptoFailure`]). Also bubbles up any
/// error from [`prepare_execution`] or the child spawn.
#[allow(clippy::too_many_lines)]
pub fn supervised_execute(
    vault: &crate::vault::Vault,
    secret_name: &str,
    env_var: &str,
    command: &[String],
    tier: SandboxTier,
) -> Result<SupervisedResult, Error> {
    // PLATFORM GUARD: sandbox tiers other than `None` are applied via
    // platform-specific paths (Linux/macOS in `pre_exec`, Windows via
    // post-spawn Job Object assignment). If we ever land on a platform
    // without any backend, refuse rather than silently spawn unsandboxed.
    #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))]
    if tier.any_isolation() {
        return Err(Error::CryptoFailure(format!(
            "supervised mode with sandbox tier '{}' is not supported on this platform",
            tier.as_str()
        )));
    }

    const MAX_SUPERVISABLE_SECRET_LEN: usize = 16383;

    let mappings = [(secret_name, env_var)];
    let prepared = prepare_execution(vault, &mappings, command)?;

    // SECURITY: an empty secret value makes `contains_secret` return false
    // for every chunk, silently disabling all leak detection. Refuse to
    // supervise rather than give the caller a false sense of security.
    //
    // Additionally, the overlap-buffer approach in `monitor_stream` can only
    // reliably detect secrets that fit within two consecutive 8192-byte reads
    // (16383 bytes max). For longer secrets, a split across three+ chunks
    // would evade detection.
    if let Some((_, ref val)) = prepared.env_pairs.first() {
        if val.len() > MAX_SUPERVISABLE_SECRET_LEN {
            return Err(Error::CryptoFailure(format!(
                "secret '{secret_name}' is {len} bytes — supervised mode cannot reliably \
                 detect leaks for secrets longer than {max} bytes. Use inject mode, split \
                 the secret, or store a shorter value.",
                len = val.len(),
                max = MAX_SUPERVISABLE_SECRET_LEN
            )));
        }
        if val.is_empty() {
            return Err(Error::CryptoFailure(format!(
                "secret '{secret_name}' has an empty value — supervised mode cannot detect \
                 leaks of an empty string. Store a non-empty secret or use inject mode."
            )));
        }
    }

    let start_time = std::time::Instant::now();
    let mut events: Vec<DataflowEvent> = Vec::new();

    events.push(DataflowEvent {
        kind: DataflowEventKind::SecretInjected,
        timestamp_ms: 0,
        detail: format!(
            "secret '{secret_name}' injected as ${env_var} into {}",
            command[0]
        ),
    });

    let mut cmd = build_supervised_command(&prepared, tier);

    // Windows: spawn the child suspended, assign it to the job, then
    // resume — eliminates the post-spawn race window where a brand-new
    // child could `CreateProcessW` a grandchild that escapes the job.
    // Other platforms apply sandbox limits in `pre_exec`, so the
    // ordinary `spawn()` already places the child inside the namespace
    // / SBPL profile before `execve`.
    #[cfg(windows)]
    let (mut child, _job_handle) = crate::sandbox::windows::spawn_in_job(&mut cmd, tier)
        .map_err(|e| Error::CryptoFailure(format!("windows sandbox spawn failed: {e}")))?;
    #[cfg(not(windows))]
    let mut child = cmd.spawn().map_err(Error::ExecFailed)?;

    let child_stdout = child.stdout.take();
    let child_stderr = child.stderr.take();

    let secret_bytes = match prepared.env_pairs.first() {
        Some((_, val)) => val.as_bytes().to_vec(),
        None => Vec::new(),
    };

    let secret_for_stdout = Zeroizing::new(secret_bytes.clone());
    let stdout_start = start_time;
    let stdout_handle = std::thread::spawn(move || -> (usize, Vec<DataflowEvent>) {
        monitor_stream(
            child_stdout,
            &secret_for_stdout,
            &DataflowEventKind::LeakedStdout,
            true,
            stdout_start,
        )
    });

    let secret_for_stderr = Zeroizing::new(secret_bytes);
    let (stderr_leaks, mut stderr_events) = monitor_stream(
        child_stderr,
        &secret_for_stderr,
        &DataflowEventKind::LeakedStderr,
        false,
        start_time,
    );

    let (stdout_leaks, mut stdout_events) = stdout_handle.join().unwrap_or((0, Vec::new()));

    let leak_count = stdout_leaks + stderr_leaks;
    events.append(&mut stdout_events);
    events.append(&mut stderr_events);

    let status = child.wait().map_err(Error::ExecFailed)?;
    let exit_code = status.code().unwrap_or(-1);

    events.push(DataflowEvent {
        kind: DataflowEventKind::ProcessExited,
        timestamp_ms: u64::try_from(start_time.elapsed().as_millis()).unwrap_or(u64::MAX),
        detail: format!("child exited with code {exit_code}"),
    });

    if leak_count > 0 {
        log_leak_alerts(&events, leak_count, secret_name, &prepared.binary_path)?;
    }

    Ok(SupervisedResult {
        exit_code,
        leak_events: leak_count,
        events,
    })
}

/// Build a `Command` with piped I/O and sandbox pre-exec hooks.
///
/// For [`SandboxTier::Lockdown`] on Linux this spawns the envseal binary in
/// `__sandbox_helper` mode instead of the target directly: the helper finishes
/// the namespace setup (private `tmpfs` over `/tmp`, `MS_PRIVATE` propagation)
/// — operations that aren't async-signal-safe and can't run in `pre_exec` —
/// then `execve`s the target via the inherited fd so TOCTOU pinning is preserved.
fn build_supervised_command(prepared: &PreparedExecution, tier: SandboxTier) -> Command {
    #[cfg(target_os = "linux")]
    {
        if matches!(tier, SandboxTier::Lockdown) {
            return build_lockdown_helper_command(prepared, tier);
        }
    }

    let mut cmd = Command::new(&prepared.exec_path);
    #[cfg(target_os = "linux")]
    cmd.arg0(&prepared.binary_path);
    cmd.args(&prepared.args);
    cmd.env_clear();
    for (k, v) in &prepared.clean_env {
        cmd.env(k, v);
    }
    for (var, val) in &prepared.env_pairs {
        cmd.env(var, val.as_str());
    }

    cmd.stdout(Stdio::piped());
    cmd.stderr(Stdio::piped());

    #[cfg(unix)]
    {
        let tier_for_child = tier;
        unsafe {
            cmd.pre_exec(move || {
                super::context::harden_child_process_inner()?;
                crate::sandbox::apply_sandbox(tier_for_child)?;
                Ok(())
            });
        }
    }
    #[cfg(not(unix))]
    let _ = tier;

    cmd
}

/// Build the Lockdown helper-mode Command (Linux-only).
///
/// The helper runs `envseal __sandbox_helper`, inheriting:
///
/// - The target binary's pinned fd (via `clear_cloexec` on `prepared._pinned_file`'s
///   raw fd in `pre_exec`).
/// - The decrypted secret env-vars and sanitized environment.
/// - Stdout/stderr pipes from the supervisor.
///
/// The helper performs the not-async-signal-safe mount setup, then `execve`s
/// the target via `/proc/self/fd/<N>`.
#[cfg(target_os = "linux")]
fn build_lockdown_helper_command(prepared: &PreparedExecution, tier: SandboxTier) -> Command {
    let envseal_self =
        std::env::current_exe().unwrap_or_else(|_| std::path::PathBuf::from("/proc/self/exe"));

    // The pinned-file fd is exposed as a typed accessor so the
    // Lockdown helper can inherit the binary across `execve`. The
    // `PreparedExecution` keeps the file alive until after spawn —
    // see [`crate::execution::context::PreparedExecution`].
    let target_fd: std::os::fd::RawFd = prepared.pinned_target_fd();

    let mut cmd = Command::new(&envseal_self);
    cmd.arg("__sandbox_helper");
    cmd.arg("--target-fd");
    cmd.arg(target_fd.to_string());
    cmd.arg("--arg0");
    cmd.arg(&prepared.binary_path);
    cmd.arg("--");
    cmd.args(&prepared.args);

    cmd.env_clear();
    for (k, v) in &prepared.clean_env {
        cmd.env(k, v);
    }
    for (var, val) in &prepared.env_pairs {
        cmd.env(var, val.as_str());
    }

    cmd.stdout(Stdio::piped());
    cmd.stderr(Stdio::piped());

    let tier_for_child = tier;
    unsafe {
        cmd.pre_exec(move || {
            super::context::harden_child_process_inner()?;
            crate::sandbox::apply_sandbox(tier_for_child)?;
            // Clear FD_CLOEXEC on the target fd so it survives the upcoming
            // execve into the helper. Without this the helper would see a
            // closed fd and have no way to reach the target binary.
            if target_fd >= 0 {
                let prev = libc::fcntl(target_fd, libc::F_GETFD);
                if prev < 0 {
                    return Err(std::io::Error::last_os_error());
                }
                if libc::fcntl(target_fd, libc::F_SETFD, prev & !libc::FD_CLOEXEC) < 0 {
                    return Err(std::io::Error::last_os_error());
                }
            }
            Ok(())
        });
    }

    cmd
}

/// Monitor a piped stream for secret leakage, forwarding clean output.
///
/// Uses an overlap buffer of `secret.len() - 1` bytes from the previous
/// chunk to catch secrets that straddle read boundaries. Without this,
/// a secret split across two 8192-byte reads would evade detection.
fn monitor_stream(
    stream: Option<impl Read>,
    secret: &[u8],
    leak_kind: &DataflowEventKind,
    is_stdout: bool,
    start_time: std::time::Instant,
) -> (usize, Vec<DataflowEvent>) {
    let mut leaks = 0;
    let mut events = Vec::new();

    if let Some(mut reader) = stream {
        let mut buf = [0u8; 8192];
        let overlap_len = if secret.len() > 1 {
            secret.len() - 1
        } else {
            0
        };
        let mut overlap = Zeroizing::new(Vec::<u8>::new());

        loop {
            match reader.read(&mut buf) {
                Ok(0) | Err(_) => break,
                Ok(n) => {
                    let chunk = &buf[..n];

                    let has_leak = if overlap.is_empty() {
                        contains_secret(chunk, secret)
                    } else {
                        let mut combined = Zeroizing::new(overlap.clone());
                        combined.extend_from_slice(chunk);
                        contains_secret(combined.as_ref(), secret)
                    };

                    if has_leak {
                        let redacted = redact_bytes(chunk, secret);
                        if is_stdout {
                            let _ = std::io::stdout().write_all(&redacted);
                        } else {
                            let _ = std::io::stderr().write_all(&redacted);
                        }
                        leaks += 1;
                        events.push(DataflowEvent {
                            kind: leak_kind.clone(),
                            timestamp_ms: u64::try_from(start_time.elapsed().as_millis())
                                .unwrap_or(u64::MAX),
                            detail: format!(
                                "secret detected in {} output ({n} bytes) — redacted",
                                if is_stdout { "stdout" } else { "stderr" }
                            ),
                        });
                    } else if is_stdout {
                        let _ = std::io::stdout().write_all(chunk);
                    } else {
                        let _ = std::io::stderr().write_all(chunk);
                    }

                    if overlap_len > 0 && n >= overlap_len {
                        overlap = Zeroizing::new(chunk[n - overlap_len..].to_vec());
                    } else if overlap_len > 0 {
                        overlap = Zeroizing::new(chunk.to_vec());
                    }
                }
            }
        }
        buf.fill(0);
        overlap.zeroize();
    }

    (leaks, events)
}

/// Log leak alerts to stderr and the audit log.
fn log_leak_alerts(
    events: &[DataflowEvent],
    leak_count: usize,
    secret_name: &str,
    binary_path: &str,
) -> Result<(), Error> {
    // Build a single multi-line detail string for the Signal so the
    // unified renderer surfaces every leak under one warning rather
    // than fragmenting it into N stderr lines that drift in format.
    let mut detail = format!(
        "{leak_count} secret leak(s) detected and REDACTED — the secret was NOT \
         exposed to the calling process"
    );
    for event in events {
        if event.kind == DataflowEventKind::LeakedStdout
            || event.kind == DataflowEventKind::LeakedStderr
        {
            detail.push_str("\n    └─ ");
            detail.push_str(&event.detail);
        }
    }

    let _ = crate::guard::emit_signal_inline(
        crate::guard::Signal::new(
            crate::guard::SignalId::scoped("execution.supervisor.leak", secret_name),
            crate::guard::Category::SupervisorLeak,
            crate::guard::Severity::Hostile,
            "supervised child leaked secret",
            detail,
            "review the child binary; tighten the rule set to require Lockdown sandbox tier",
        ),
        &crate::security_config::load_system_defaults(),
    );

    // Keep the structured audit event alongside the Signal-flavored log
    // entry — it carries typed fields useful for forensic analytics
    // beyond what the SignalId alone captures.
    crate::audit::log_required(&crate::audit::AuditEvent::SupervisorLeakDetected {
        secret: secret_name.to_string(),
        binary: binary_path.to_string(),
        leak_count,
    })?;
    Ok(())
}

/// Check if a byte buffer contains the secret value.
fn contains_secret(haystack: &[u8], needle: &[u8]) -> bool {
    if needle.is_empty() || haystack.len() < needle.len() {
        return false;
    }
    haystack.windows(needle.len()).any(|w| w == needle)
}

/// Replace all occurrences of the secret in a byte buffer with a redaction marker.
fn redact_bytes(data: &[u8], secret: &[u8]) -> Zeroizing<Vec<u8>> {
    if secret.is_empty() {
        return Zeroizing::new(data.to_vec());
    }

    let marker = b"[ENVSEAL:REDACTED]";
    let mut result = Zeroizing::new(Vec::with_capacity(data.len()));
    let mut i = 0;

    while i < data.len() {
        if i + secret.len() <= data.len() && &data[i..i + secret.len()] == secret {
            result.extend_from_slice(marker);
            i += secret.len();
        } else {
            result.push(data[i]);
            i += 1;
        }
    }

    result
}

/// Print a dataflow report to stderr.
pub fn print_dataflow_report(result: &SupervisedResult, secret_name: &str) {
    eprintln!();
    eprintln!("╔═══════════════════════════════════════════════╗");
    eprintln!("║          envseal dataflow report              ║");
    eprintln!("╚═══════════════════════════════════════════════╝");
    eprintln!();

    for event in &result.events {
        let icon = match event.kind {
            DataflowEventKind::SecretInjected => "🔑",
            DataflowEventKind::LeakedStdout | DataflowEventKind::LeakedStderr => "🚨",
            DataflowEventKind::ProcessExited => "✓",
        };
        eprintln!("  {icon} [{:>6}ms] {}", event.timestamp_ms, event.detail);
    }

    eprintln!();
    if result.leak_events > 0 {
        eprintln!(
            "  ⚠️  {} leak(s) detected and REDACTED for secret '{}'",
            result.leak_events, secret_name
        );
    } else {
        eprintln!("  ✅ no leaks detected for secret '{secret_name}'");
    }
    eprintln!("  exit code: {}", result.exit_code);
    eprintln!();
}