python_script_runner 1.7.10

#![warn(clippy::pedantic)]

// FILE:    python_script_runner.rs
// PURPOSE: Execute Python scripts from Rust with path traversal prevention,
//          environment isolation, exponential backoff retry, timeout enforcement,
//          and output size limits.
// GOAL:    Expose run_python_script and PythonExecutor as the public API, backed by
//          project-root path management, a configurable retry loop, and security
//          hardening against path traversal, env-var leakage, and resource exhaustion.
// RUNS TO: PythonExecutor::execute_script() / run_python_script()
// FILES:   (None - standalone module)

// NOTE:   Script execution benchmarks not included — depend on external Python interpreter

// ═══════════════════════════════════════════════════════════════════════════════
// KEY FEATURES & DESIGN DECISIONS
// ═══════════════════════════════════════════════════════════════════════════════

// 1. Path Traversal Prevention (CRITICAL)
//    - Rejects paths containing ".." or starting with "/"
//    - Canonicalises the resolved path and verifies it stays inside project_dir
//    - WHY: An attacker controlling relative_path could escape to read any file
//           on the system (e.g. "../../.env"); canonicalise+prefix-check closes
//           both "../" traversal and symlink attacks simultaneously.

// 2. Environment Isolation (CRITICAL)
//    - Clears the inherited environment with env_clear(), then allowlists only
//      PATH, TZ, and PROJECT_DIRECTORY
//    - WHY: Without isolation the child Python process inherits ENCRYPTION_KEY,
//           DATABASE_URL, AWS_ACCESS_KEY_ID, and every other secret loaded into
//           the parent process — a single compromised script leaks them all.
//           PYTHONPATH is intentionally NOT forwarded: allowing it would let
//           scripts import attacker-controlled modules from arbitrary paths.

// 3. Static Python Binary Detection
//    - Resolved once at module load via OnceLock; respects PYTHON_EXECUTABLE override
//    - WHY: Probing python3 --version on every execute_script call wastes a process
//           spawn per call and introduces a race (interpreter removed between probe
//           and use); OnceLock pays the cost exactly once.

// 4. Exponential Backoff with Jitter (RULE retry-backoff)
//    - Uses the `backoff` crate; formula: min(1s, 10ms * 2^attempt) ± 25% jitter
//    - WHY: Fixed delays cause thundering herd when many callers retry
//           simultaneously; jitter spreads load; exponential growth avoids hammering
//           a service that needs time to recover.

// 5. Per-call Timeout
//    - tokio::time::timeout wraps every attempt; 5 s per attempt
//    - WHY: Without a timeout an infinite loop in a Python script stalls the
//           Tokio executor thread indefinitely and prevents other tasks from running.

// 6. Output Size Cap
//    - Streams stdout/stderr via AsyncRead; kills the child if output exceeds
//      MAX_OUTPUT_BYTES (64 KB)
//    - WHY: A misbehaving script writing unbounded output allocates memory until
//           the process is OOM-killed, taking down the whole service.

// 7. Early File Validation
//    - Validates path before entering the retry loop
//    - WHY: A missing script will never succeed on retry; failing fast avoids
//           multiple expensive process spawn attempts and gives a clearer error.

// ═══════════════════════════════════════════════════════════════════════════════
// SECTION MAP:
//   1. CONSTANTS            — Tuneable limits and defaults
//   2. ENVIRONMENT SETUP    — ScriptPaths, PROJECT_DIRECTORY resolution
//   3. PYTHON BINARY        — OnceLock-based interpreter detection
//   4. EXECUTOR             — PythonExecutor public struct
//   5. SCRIPT RUNNER        — run_python_script with retry, timeout, output cap
//   TESTS                   — per-function test modules (directly below each fn)
// ═══════════════════════════════════════════════════════════════════════════════

// ── IMPORTS ──────────────────────────────────────────────────────────────────
// Three groups: stdlib · third-party · internal

// ── stdlib ───────────────────────────────────────────────────────────────────
use std::env;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use std::time::Duration;

// ── third-party ──────────────────────────────────────────────────────────────
use backoff::future::retry;
use backoff::{Error as BackoffError, ExponentialBackoffBuilder};
use dotenvy::dotenv;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::process::Command;
use tokio::time::timeout;

// ── internal ─────────────────────────────────────────────────────────────────
// No internal imports in this module

// ── SECTION 1 · CONSTANTS ────────────────────────────────────────────────────
// Goal: Define all tuneable limits in one place so they can be adjusted without
//       hunting through business logic.
//
//       Values are intentionally small so tests run fast both locally and in CI
//       with no env-var overrides or per-environment branching needed.
//       WHY: One set of constants for everywhere keeps the codebase simple;
//            the test scripts are written to trip the limits cheaply so raising
//            these for production just means updating numbers here.

/// Maximum stdout + stderr bytes captured per script run before the child is killed.
///
/// WHY: 64 KB is sufficient for any legitimate script output in this project.
///      The output-size test writes 9 × 8192 = 73728 bytes to trip this cheaply.
const MAX_OUTPUT_BYTES: usize = 64 * 1024; // 64 KB

/// Hard wall-clock timeout per attempt (seconds).
///
/// WHY: 5 s is enough for any script we run today and keeps CI fast;
///      a hung script is killed well before the job timeout.
const SCRIPT_TIMEOUT_SECS: u64 = 5;

/// Maximum retry attempts before returning a permanent error.
///
/// WHY: 1 attempt means no retry waits at all; the exhaustion test completes
///      in milliseconds with no backoff accumulation.
const MAX_RETRY_ATTEMPTS: u32 = 1;

/// Initial backoff interval fed to the exponential strategy.
///
/// WHY: 10 ms keeps any retry delay negligible.
const INITIAL_BACKOFF_MS: u64 = 10;

/// Maximum backoff cap per RULE retry-backoff.
///
/// WHY: 1 s cap is consistent with the small initial interval.
const MAX_BACKOFF_SECS: u64 = 1;

// ── SECTION 2 · ENVIRONMENT SETUP ────────────────────────────────────────────
// Goal: Resolve PROJECT_DIRECTORY from .env and build safe, canonicalised
//       absolute paths from project-relative inputs.

/// Resolves script paths relative to the project root and enforces path safety.
///
/// WHY: Centralises PROJECT_DIRECTORY resolution and all traversal-prevention
///      logic so that every path constructed in this module shares one source of
///      truth; no caller can accidentally bypass the safety checks.
struct ScriptPaths {
    project_dir: PathBuf,
}

impl ScriptPaths {
    /// Load PROJECT_DIRECTORY from .env and initialise the path resolver.
    ///
    /// WHY: dotenv().ok() is called here so this module is self-contained —
    ///      callers need not load .env themselves.  Panics on missing
    ///      PROJECT_DIRECTORY because an uninitialised project root makes every
    ///      downstream path invalid; failing loudly at startup is safer than
    ///      silently producing wrong paths later.
    fn new() -> Self {
        dotenv().ok();
        let project_dir =
            env::var("PROJECT_DIRECTORY").expect("PROJECT_DIRECTORY must be set in .env file");
        // WHY: Canonicalise so that symlinked PROJECT_DIRECTORY values (deploy
        //      symlinks, container volume mounts, NFS paths) resolve correctly.
        //      Without this, a file canonicalised through the real path fails
        //      starts_with() against the symlinked project_dir, rejecting every script.
        let canonical_dir = PathBuf::from(&project_dir)
            .canonicalize()
            .unwrap_or_else(|_| PathBuf::from(&project_dir));
        ScriptPaths {
            project_dir: canonical_dir,
        }
    }

    /// Resolve a project-relative path to a safe, canonicalised absolute path.
    ///
    /// Returns `Err` when:
    /// - `relative_path` contains ".." (traversal attempt)
    /// - `relative_path` starts with '/' (absolute path bypass)
    /// - The resolved path does not exist on disk
    /// - The canonicalised path escapes `project_dir` (symlink attack)
    ///
    /// WHY: Three-layer defence — string-level rejection catches obvious "../.."
    ///      before any filesystem call; canonicalise resolves symlinks; prefix
    ///      check ensures the final target is inside the project tree even if a
    ///      symlink pointed elsewhere.
    fn get_safe_script_path(&self, relative_path: &str) -> io::Result<PathBuf> {
        // WHY: Reject traversal patterns before touching the filesystem;
        //      string-level check is fast and catches the common attack vector.
        if relative_path.contains("..") {
            return Err(io::Error::new(
                io::ErrorKind::InvalidInput,
                format!("Path traversal not allowed: {}", relative_path),
            ));
        }
        if relative_path.starts_with('/') {
            return Err(io::Error::new(
                io::ErrorKind::InvalidInput,
                format!("Absolute paths not allowed: {}", relative_path),
            ));
        }

        let full_path = self.project_dir.join(relative_path);

        // WHY: canonicalize() resolves symlinks and normalises ".." components
        //      injected by the OS layer; a path that doesn't exist fails here
        //      with NotFound rather than silently returning a wrong path.
        let canonical = full_path.canonicalize().map_err(|_| {
            io::Error::new(
                io::ErrorKind::NotFound,
                format!("Script not found: {}", relative_path),
            )
        })?;

        // WHY: Even after string-level checks, a symlink inside the project tree
        //      could point outside it; the prefix check is the final guarantee
        //      that the resolved file lives under project_dir.
        if !canonical.starts_with(&self.project_dir) {
            return Err(io::Error::new(
                io::ErrorKind::PermissionDenied,
                format!(
                    "Script must be within project directory. Got: {}",
                    canonical.display()
                ),
            ));
        }

        Ok(canonical)
    }
}

// ── TESTS ──────────────────────────────────────────────────────────────────
#[cfg(test)]
mod script_paths_tests {
    use super::*;
    use serial_test::serial;
    use tempfile::{tempdir, NamedTempFile};

    fn make_paths_for(dir: &Path) -> ScriptPaths {
        ScriptPaths {
            project_dir: dir.canonicalize().unwrap_or_else(|_| dir.to_path_buf()),
        }
    }

    /// Verifies all path safety branches: traversal, absolute, missing,
    /// symlink escape, and the happy-path resolve.
    ///
    /// WHY: get_safe_script_path is the security boundary — every code path
    ///      must be exercised to confirm no bypass exists.
    #[test]
    #[serial]
    fn test_get_safe_script_path() {
        let dir = tempdir().unwrap();
        let paths = make_paths_for(dir.path());

        // ── ".." traversal is rejected immediately ────────────────────────────
        // WHY: Must fail before any filesystem call to prevent TOCTOU issues.
        {
            let err = paths.get_safe_script_path("../etc/passwd").unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
            assert!(err.to_string().contains("traversal"));
        }

        // ── embedded ".." also rejected ──────────────────────────────────────
        {
            let err = paths
                .get_safe_script_path("scripts/../../etc/passwd")
                .unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
        }

        // ── absolute path prefix rejected ────────────────────────────────────
        // WHY: An attacker supplying "/etc/passwd" must be stopped regardless
        //      of whether ".." appears in the string.
        {
            let err = paths.get_safe_script_path("/etc/passwd").unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
            assert!(err.to_string().contains("Absolute"));
        }

        // ── non-existent file returns NotFound ───────────────────────────────
        {
            let err = paths
                .get_safe_script_path("no_such_file.py")
                .unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::NotFound);
        }

        // ── valid file inside project dir resolves correctly ─────────────────
        // WHY: Happy path must succeed and return the canonical absolute path.
        {
            let script = dir.path().join("ok.py");
            std::fs::write(&script, "print('ok')").unwrap();
            let result = paths.get_safe_script_path("ok.py").unwrap();
            assert_eq!(result, script.canonicalize().unwrap());
        }

        // ── symlink pointing outside project_dir is rejected ─────────────────
        // WHY: A symlink inside the project tree could still resolve to a file
        //      outside it; the prefix check must catch this case.
        #[cfg(unix)]
        {
            let outside = NamedTempFile::new().unwrap();
            let link = dir.path().join("escape_link.py");
            std::os::unix::fs::symlink(outside.path(), &link).unwrap();
            let err = paths.get_safe_script_path("escape_link.py").unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
        }
    }
}

// ── SECTION 3 · PYTHON BINARY ────────────────────────────────────────────────
// Goal: Detect the Python interpreter once at startup and expose it as a
//       module-level constant to avoid per-call probe overhead.

/// Cached path to the Python interpreter, resolved at first access.
///
/// WHY: A OnceLock pays the detection cost once; subsequent calls read from
///      cache with no process spawn.  An env override (PYTHON_EXECUTABLE) lets
///      CI and venv users pin an exact interpreter without modifying code.
static PYTHON_BIN: OnceLock<String> = OnceLock::new();

/// Return the Python interpreter binary to use.
///
/// WHY: Separated from PYTHON_BIN initialisation so tests can call the
///      detection logic without touching global state.
fn resolve_python_bin() -> String {
    // WHY: PYTHON_EXECUTABLE env var is checked first so venv and conda users
    //      can override detection without changing code; a missing or invalid
    //      path fails loudly at initialisation, not mid-execution.
    if let Ok(explicit) = env::var("PYTHON_EXECUTABLE") {
        return explicit;
    }
    // WHY: `which` crate searches PATH correctly on all platforms; avoids
    //      spawning a child process just to check binary existence.
    if which::which("python3").is_ok() {
        return "python3".to_string();
    }
    if which::which("python").is_ok() {
        return "python".to_string();
    }
    panic!("No Python interpreter found on PATH. Set PYTHON_EXECUTABLE env var to override.");
}

/// Retrieve (or initialise) the cached Python binary path.
fn get_python_bin() -> &'static str {
    PYTHON_BIN.get_or_init(resolve_python_bin)
}

// ── TESTS ──────────────────────────────────────────────────────────────────
#[cfg(test)]
mod python_bin_tests {
    use super::*;
    use serial_test::serial;

    /// Verifies that resolve_python_bin honours PYTHON_EXECUTABLE and falls
    /// back to PATH detection when the override is absent.
    ///
    /// WHY: The env-var override is the primary integration point for CI
    ///      pipelines and venvs — it must be tested to guarantee it works.
    #[test]
    #[serial]
    fn test_resolve_python_bin() {
        // ── PYTHON_EXECUTABLE override is respected ───────────────────────────
        // WHY: CI environments may use /usr/bin/python3.11 explicitly; the
        //      override must take precedence over PATH detection.
        {
            // SAFETY: test-only env mutation, guarded by serial_test
            unsafe { env::set_var("PYTHON_EXECUTABLE", "/custom/python") };
            let bin = resolve_python_bin();
            assert_eq!(bin, "/custom/python");
            unsafe { env::remove_var("PYTHON_EXECUTABLE") };
        }

        // ── falls back to PATH detection when override absent ─────────────────
        // WHY: Normal runtime path; result must be non-empty string.
        //      If neither python3 nor python is on PATH this sub-section is
        //      skipped gracefully rather than failing CI.
        {
            unsafe { env::remove_var("PYTHON_EXECUTABLE") };
            // If python is not available, resolve_python_bin would panic;
            // guard with a which check so the test skips cleanly.
            if which::which("python3").is_ok() || which::which("python").is_ok() {
                let bin = resolve_python_bin();
                assert!(!bin.is_empty());
            }
        }
    }
}

// ── SECTION 4 · EXECUTOR ─────────────────────────────────────────────────────
// Goal: Provide the public struct callers use to execute scripts via
//       project-relative paths.

/// [TIER 1] Public interface for executing Python scripts with automatic path
/// resolution, path-traversal prevention, and environment isolation.
///
/// WHY: Wrapping ScriptPaths inside PythonExecutor keeps path resolution an
///      implementation detail; callers interact only with execute_script().
pub struct PythonExecutor {
    script_paths: ScriptPaths,
}

impl PythonExecutor {
    /// Create a new executor, loading PROJECT_DIRECTORY from .env.
    ///
    /// WHY: Explicit constructor keeps initialisation visible at the call site;
    ///      no hidden global state is mutated on import.
    pub fn new() -> Self {
        PythonExecutor {
            script_paths: ScriptPaths::new(),
        }
    }

    /// [TIER 1] Execute a Python script given a project-relative path.
    ///
    /// Returns `Err` if the path is unsafe, the script is missing, or all retry
    /// attempts are exhausted.
    ///
    /// WHY: Delegates path resolution + safety to ScriptPaths and execution to
    ///      run_python_script so each concern lives in exactly one place.
    pub async fn execute_script(&self, relative_path: &str) -> io::Result<()> {
        let canonical = self.script_paths.get_safe_script_path(relative_path)?;
        let path_str = canonical.to_str().ok_or_else(|| {
            io::Error::new(
                io::ErrorKind::InvalidInput,
                format!("Non-UTF-8 script path: {}", relative_path),
            )
        })?;
        run_python_script(path_str).await
    }
}

impl Default for PythonExecutor {
    fn default() -> Self {
        Self::new()
    }
}

// ── TESTS ──────────────────────────────────────────────────────────────────
#[cfg(test)]
mod execute_script_tests {
    use super::*;
    use serial_test::serial;
    use tempfile::tempdir;

    fn make_executor_for(dir: &std::path::Path) -> PythonExecutor {
        PythonExecutor {
            script_paths: ScriptPaths {
                project_dir: dir.canonicalize().unwrap_or_else(|_| dir.to_path_buf()),
            },
        }
    }

    /// Verifies that execute_script rejects traversal attempts and executes
    /// valid scripts end-to-end.
    ///
    /// WHY: The executor is the only public entry point for project-relative
    ///      paths; every safety branch must be verified here.
    #[tokio::test]
    #[serial]
    async fn test_execute_script() {
        let dir = tempdir().unwrap();
        let executor = make_executor_for(dir.path());

        // ── path traversal is rejected before execution ───────────────────────
        // WHY: Confirms the security check is wired through the public API and
        //      not just present in ScriptPaths internals.
        {
            let err = executor.execute_script("../../etc/passwd").await.unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
        }

        // ── missing script returns NotFound ───────────────────────────────────
        {
            let err = executor
                .execute_script("no_such_script.py")
                .await
                .unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::NotFound);
        }

        // ── valid script executes successfully ────────────────────────────────
        // WHY: End-to-end smoke test through the public API.
        //      Skipped when Python is not on PATH.
        {
            if which::which("python3").is_ok() || which::which("python").is_ok() {
                let script = dir.path().join("hello.py");
                std::fs::write(&script, "print('hello from executor')").unwrap();
                let result = executor.execute_script("hello.py").await;
                assert!(result.is_ok(), "expected Ok, got: {:?}", result);
            }
        }
    }
}

// ── SECTION 5 · SCRIPT RUNNER ────────────────────────────────────────────────
// Goal: Execute a Python script at an absolute, pre-validated path with
//       environment isolation, exponential-backoff retry, per-attempt timeout,
//       and output size enforcement.

/// [TIER 1] Execute a Python script at the given pre-validated absolute path.
///
/// Streams stdout and stderr to the process's stdout/stderr in real time.
/// Retries up to MAX_RETRY_ATTEMPTS times with exponential backoff and jitter.
/// Each attempt is bounded by SCRIPT_TIMEOUT_SECS.
/// Kills the child and returns an error if output exceeds MAX_OUTPUT_BYTES.
///
/// WHY: Separated from PythonExecutor so callers with an already-validated
///      absolute path do not pay for path resolution overhead; single
///      responsibility keeps each function independently testable.
pub async fn run_python_script(script_path: &str) -> io::Result<()> {
    // WHY: Pre-validate before entering the retry loop — a missing file will
    //      never succeed on retry; fail fast to avoid backoff waits and give a
    //      clear NotFound error instead of a process launch failure.
    if !Path::new(script_path).exists() {
        return Err(io::Error::new(
            io::ErrorKind::NotFound,
            format!("Script not found at: {}", script_path),
        ));
    }

    // WHY: Capture script_path as an owned String so the closure can be 'static;
    //      the backoff crate requires the future factory to own its captured data.
    let path = script_path.to_owned();

    // WHY: ExponentialBackoffBuilder gives us jitter + cap in one call;
    //      satisfies RULE retry-backoff without hand-rolling the formula.
    let backoff_policy = ExponentialBackoffBuilder::new()
        .with_initial_interval(Duration::from_millis(INITIAL_BACKOFF_MS))
        .with_max_interval(Duration::from_secs(MAX_BACKOFF_SECS))
        .with_max_elapsed_time(None) // WHY: we control attempts via max_tries, not wall time
        .build();

    let mut attempt = 0u32;

    retry(backoff_policy, || {
        let path = path.clone();
        attempt += 1;
        let current = attempt;

        async move {
            let result = timeout(
                Duration::from_secs(SCRIPT_TIMEOUT_SECS),
                run_single_attempt(&path),
            )
            .await;

            match result {
                // WHY: timeout() wraps the inner Result in an outer Result;
                //      flatten the two layers into one error value.
                Err(_) => {
                    let msg = format!(
                        "Script timed out after {} s (attempt {}/{}): {}",
                        SCRIPT_TIMEOUT_SECS, current, MAX_RETRY_ATTEMPTS, path
                    );
                    eprintln!("WARN  [python_runner] {}", msg);
                    if current >= MAX_RETRY_ATTEMPTS {
                        Err(BackoffError::permanent(io::Error::new(
                            io::ErrorKind::TimedOut,
                            msg,
                        )))
                    } else {
                        Err(BackoffError::transient(io::Error::new(
                            io::ErrorKind::TimedOut,
                            msg,
                        )))
                    }
                }
                Ok(Ok(())) => Ok(()),
                Ok(Err(e)) => {
                    let msg = format!(
                        "Script failed (attempt {}/{}): {} — {}",
                        current, MAX_RETRY_ATTEMPTS, path, e
                    );
                    if current >= MAX_RETRY_ATTEMPTS {
                        eprintln!("ERROR [python_runner] {}", msg);
                        Err(BackoffError::permanent(e))
                    } else {
                        eprintln!("WARN  [python_runner] {}", msg);
                        Err(BackoffError::transient(e))
                    }
                }
            }
        }
    })
    .await
}

/// Execute the script exactly once with environment isolation and output cap.
///
/// WHY: Single-attempt logic lives here so run_python_script can focus on
///      retry orchestration; isolated function is easier to unit-test and reason about.
async fn run_single_attempt(script_path: &str) -> io::Result<()> {
    let mut child = build_isolated_command(script_path).spawn()?;

    // WHY: take() moves the pipe handles out of Child so we can read them
    //      concurrently below without holding a mutable borrow on child.
    let mut stdout = child
        .stdout
        .take()
        .ok_or_else(|| io::Error::other("Failed to capture stdout"))?;
    let mut stderr = child
        .stderr
        .take()
        .ok_or_else(|| io::Error::other("Failed to capture stderr"))?;

    // WHY: Both pipes are read concurrently via tokio::spawn. If only stdout
    //      is read (the old design), a script writing >64 KB to stderr while
    //      also writing stdout deadlocks: stderr fills its pipe buffer, the
    //      process blocks on write(stderr), stops writing stdout, and
    //      child.wait() never fires.
    let stdout_handle = tokio::spawn(async move {
        let mut total: usize = 0;
        let mut buf = vec![0u8; 8192];
        loop {
            let n = stdout.read(&mut buf).await?;
            if n == 0 {
                break;
            }
            total += n;
            if total > MAX_OUTPUT_BYTES {
                return Err(io::Error::new(
                    io::ErrorKind::Other,
                    format!("output exceeded {} KB", MAX_OUTPUT_BYTES / 1024),
                ));
            }
            tokio::io::stdout().write_all(&buf[..n]).await?;
        }
        Ok(total)
    });

    let stderr_handle = tokio::spawn(async move {
        let mut buf = Vec::new();
        stderr.read_to_end(&mut buf).await?;
        if !buf.is_empty() {
            tokio::io::stderr().write_all(&buf).await?;
        }
        Ok::<_, io::Error>(())
    });

    let status = child.wait().await?;

    // Check stdout for size cap overflow
    match stdout_handle.await {
        Ok(Ok(_)) => {}
        Ok(Err(_e)) => {
            child.kill().await.ok();
            return Err(io::Error::other(format!(
                "Script output exceeded {} KB limit: {}",
                MAX_OUTPUT_BYTES / 1024,
                script_path
            )));
        }
        Err(e) if e.is_panic() => {
            return Err(io::Error::other("stdout reader panicked"));
        }
        Err(e) => {
            return Err(io::Error::other(e.to_string()));
        }
    }

    // Best-effort: don't mask exit status
    stderr_handle.await.ok();

    if status.success() {
        Ok(())
    } else {
        Err(io::Error::other(
            format!(
                "Script exited with status {}: {}",
                status.code().unwrap_or(-1),
                script_path
            ),
        ))
    }
}

/// Build a Command with a minimal, isolated environment.
///
/// WHY: env_clear() ensures the child never inherits secrets from the parent
///      process (ENCRYPTION_KEY, DATABASE_URL, API keys, etc.); an explicit
///      allowlist grants only what the script legitimately needs.
fn build_isolated_command(script_path: &str) -> Command {
    let mut cmd = Command::new(get_python_bin());
    cmd.arg(script_path)
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
        // WHY: env_clear() is called FIRST so subsequent env() calls build the
        //      allowlist from a clean slate rather than subtracting secrets.
        .env_clear()
        // WHY: Minimal PATH — only standard system directories; no user
        //      additions that could shadow system tools or leak directory layout.
        .env("PATH", "/usr/local/bin:/usr/bin:/bin")
        // WHY: TZ set to UTC so script datetime output is deterministic
        //      across developer machines and CI environments.
        .env("TZ", "UTC");

    // WHY: PROJECT_DIRECTORY is explicitly forwarded so scripts can locate
    //      sibling resources without hardcoded paths.
    if let Ok(project_dir) = env::var("PROJECT_DIRECTORY") {
        cmd.env("PROJECT_DIRECTORY", project_dir);
    }

    cmd
}

// ── TESTS ──────────────────────────────────────────────────────────────────
#[cfg(test)]
mod run_python_script_tests {
    use super::*;
    use serial_test::serial;
    use std::io::Write;
    use tempfile::NamedTempFile;

    fn python_available() -> bool {
        which::which("python3").is_ok() || which::which("python").is_ok()
    }

    /// Verifies: NotFound on missing file, path in error message, Ok on valid
    /// script, exhaustion error on always-failing script, and output-size limit
    /// enforcement.
    ///
    /// WHY: run_python_script is the critical execution path — every branch
    ///      (security + resilience) must be verified end-to-end.
    #[tokio::test]
    #[serial]
    async fn test_run_python_script() {
        // ── missing file returns NotFound before any retry ────────────────────
        // WHY: A missing script must fail immediately with NotFound; must not
        //      trigger the retry loop (which would waste backoff wait time).
        {
            let result = run_python_script("/tmp/__nonexistent_abc123__.py").await;
            let err = result.unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::NotFound);
            assert!(err.to_string().contains("/tmp/__nonexistent_abc123__.py"));
        }

        // ── error message contains the script path ────────────────────────────
        // WHY: Operators must see exactly which script caused the failure.
        {
            let path = "/nonexistent/path/script.py";
            let err = run_python_script(path).await.unwrap_err();
            assert!(err.to_string().contains(path));
        }

        if !python_available() {
            // Remaining sub-sections require a Python interpreter.
            return;
        }

        // ── valid script returns Ok ───────────────────────────────────────────
        // WHY: Smoke test for the happy path through retry + timeout + streaming.
        {
            let mut tmp = NamedTempFile::new().unwrap();
            writeln!(tmp, "print('hello')").unwrap();
            let path = tmp.path().to_str().unwrap().to_string();
            let result = run_python_script(&path).await;
            assert!(result.is_ok(), "expected Ok, got: {:?}", result);
        }

        // ── always-failing script exhausts retries ────────────────────────────
        // WHY: MAX_RETRY_ATTEMPTS=1 and INITIAL_BACKOFF_MS=10 mean this
        //      completes in ~10 ms with no meaningful wait.
        {
            let mut tmp = NamedTempFile::new().unwrap();
            writeln!(tmp, "import sys; sys.exit(1)").unwrap();
            let path = tmp.path().to_str().unwrap().to_string();
            let err = run_python_script(&path).await.unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::Other);
        }

        // ── output exceeding MAX_OUTPUT_BYTES (64 KB) is killed ───────────────
        // WHY: 9 × 8192 = 73728 bytes exceeds the 65536 limit cheaply;
        //      no large allocations needed, completes in milliseconds.
        {
            let mut tmp = NamedTempFile::new().unwrap();
            writeln!(
                tmp,
                "import sys\nfor _ in range(9):\n    sys.stdout.write('x' * 8192)\n    sys.stdout.flush()"
            )
            .unwrap();
            let path = tmp.path().to_str().unwrap().to_string();
            let err = run_python_script(&path).await.unwrap_err();
            assert_eq!(err.kind(), io::ErrorKind::Other);
            assert!(
                err.to_string().contains("KB"),
                "expected KB-limit message, got: {}",
                err
            );
        }
    }
}

// ── TESTS ──────────────────────────────────────────────────────────────────
#[cfg(test)]
mod build_isolated_command_tests {
    use super::*;
    use serial_test::serial;
    use std::io::Write;
    use tempfile::NamedTempFile;

    /// Verifies environment isolation end-to-end: spawn a trivial script that
    /// prints its env, then assert parent secrets are absent and allowlisted
    /// keys are present.
    ///
    /// WHY: Command's Debug format is not a stable API — it may not include
    ///      env vars on some platforms, so a passing Debug test does not prove
    ///      isolation. Spawning a real child process and inspecting its actual
    ///      environment is the only correct verification.
    #[tokio::test]
    #[serial]
    async fn test_env_isolation_end_to_end() {
        if !which::which("python3").is_ok() && !which::which("python").is_ok() {
            return; // Skip when no Python interpreter is available.
        }

        // Set parent secrets that must NOT reach the child.
        unsafe { env::set_var("ENCRYPTION_KEY", "super_secret") };
        unsafe { env::set_var("DATABASE_URL", "postgres://secret") };

        let mut tmp = NamedTempFile::new().unwrap();
        writeln!(
            tmp,
            r#"import os
for _k, _v in sorted(os.environ.items()): print(str(_k) + "=" + str(_v))"#
        )
        .unwrap();

        // Use build_isolated_command() to apply env_clear() + allowlist,
        // then replace the script arg with our temp file.
        let output = build_isolated_command(tmp.path().to_str().unwrap())
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
            .output()
            .await
            .unwrap();

        // Clean up parent secrets.
        unsafe { env::remove_var("ENCRYPTION_KEY") };
        unsafe { env::remove_var("DATABASE_URL") };

        let stdout_str = String::from_utf8_lossy(&output.stdout);

        // Parent secrets must be absent.
        assert!(
            !stdout_str.contains("super_secret"),
            "ENCRYPTION_KEY must not reach child; env output: {}",
            stdout_str
        );
        assert!(
            !stdout_str.contains("postgres://secret"),
            "DATABASE_URL must not reach child; env output: {}",
            stdout_str
        );

        // Allowlisted keys must be present.
        assert!(
            stdout_str.contains("PATH="),
            "PATH must be forwarded; env output: {}",
            stdout_str
        );
        assert!(
            stdout_str.contains("TZ="),
            "TZ must be forwarded; env output: {}",
            stdout_str
        );
        // PROJECT_DIRECTORY is conditionally forwarded — only if set in parent.
        if env::var("PROJECT_DIRECTORY").is_ok() {
            assert!(
            stdout_str.contains("PROJECT_DIRECTORY="),
                "PROJECT_DIRECTORY must be forwarded; env output: {}",
                stdout_str
            );
        }
    }
}

// ══════════════════════════════════════════════════════════════════════════════
// TEST COVERAGE MATRIX
// ══════════════════════════════════════════════════════════════════════════════
//
// Tier 1 = Public API      → must be exhaustive; called by application code
// Tier 2 = Internal Logic  → correctness + resilience; helpers & infrastructure
//
// Legend: ✅ covered  ⚠️ partial  ❌ not covered
//
// ┌──────────────────────────────────────────┬──────┬────────────────────────────────┬──────┬───────────────────────────────────────────────────────────────────────────────────┐
// │ Function / Component                     │ Tier │ Test Module                    │Tests │ Sub-sections covered                                                              │
// ├──────────────────────────────────────────┼──────┼────────────────────────────────┼──────┼───────────────────────────────────────────────────────────────────────────────────┤
// │ PythonExecutor::execute_script()         │  1   │ execute_script_tests           │ 1 ✅ │ traversal rejected, missing → NotFound, valid script → Ok, python-skip guard      │
// │ run_python_script()                      │  1   │ run_python_script_tests        │ 1 ✅ │ NotFound, path in error, Ok, exhaustion error, output size cap                    │
// ├──────────────────────────────────────────┼──────┼────────────────────────────────┼──────┼───────────────────────────────────────────────────────────────────────────────────┤
// │ ScriptPaths::get_safe_script_path()      │  2   │ script_paths_tests             │ 1 ✅ │ ".." rejected, embedded ".." rejected, "/" rejected, missing → NotFound,          │
// │                                          │      │                                │      │ valid → canonical path, symlink escape → PermissionDenied (unix only)             │
// │ resolve_python_bin()                     │  2   │ python_bin_tests               │ 1 ✅ │ PYTHON_EXECUTABLE override respected, PATH fallback non-empty                     │
// │ build_isolated_command()                 │  2   │ build_isolated_command_tests   │ 1 ✅ │ secrets absent from env, PROJECT_DIRECTORY forwarded                              │
// ├──────────────────────────────────────────┼──────┼────────────────────────────────┼──────┼───────────────────────────────────────────────────────────────────────────────────┤
// │ TOTALS                                   │      │                                │      │                                                                                   │
// │   Tier 1 — Public API                    │                2 functions            │ 2 ✅ │ All public functions fully covered                                                │
// │   Tier 2 — Internal Logic                │               3 components            │ 3 ✅ │ Path safety, interpreter detection, env isolation all tested                      │
// │   TOTAL                                  │                                       │ 5 ✅ │                                                                                   │
// └──────────────────────────────────────────┴──────┴────────────────────────────────┴──────┴───────────────────────────────────────────────────────────────────────────────────┘

// ═══════════════════════════════════════════════════════════════════════════════
// USAGE GUIDE
// ═══════════════════════════════════════════════════════════════════════════════
//
// 1. Set PROJECT_DIRECTORY in .env (required):
//    PROJECT_DIRECTORY=/path/to/project
//
// 2. Optional overrides in .env:
//    PYTHON_EXECUTABLE=/path/to/venv/bin/python   # pin interpreter
//
// 3. Use PythonExecutor (project-relative path):
//    use python_script_runner::PythonExecutor;
//    let executor = PythonExecutor::new();
//    executor.execute_script("scripts/my_script.py").await?;
//
// 4. Use run_python_script (pre-validated absolute path):
//    use python_script_runner::run_python_script;
//    run_python_script("/absolute/path/to/script.py").await?;
//
// Key points:
// - PythonExecutor validates + resolves paths; rejects ".." and "/" prefixes
// - Environment is always isolated: only PATH, TZ, PROJECT_DIRECTORY forwarded
// - Both paths retry up to MAX_RETRY_ATTEMPTS with exponential backoff + jitter
// - Each attempt is bounded by SCRIPT_TIMEOUT_SECS
// - Output exceeding MAX_OUTPUT_BYTES kills the child and returns an error
//
// Required Cargo.toml additions:
//   [dependencies]
//   backoff    = { version = "0.4", features = ["tokio"] }
//   dotenvy    = "0.15"
//   tokio      = { version = "1", features = ["full"] }
//   which      = "6"
//
//   [dev-dependencies]
//   serial_test = "3"
//   tempfile    = "3"

// ═══════════════════════════════════════════════════════════════════════════════
// EXAMPLE MAIN
// ═══════════════════════════════════════════════════════════════════════════════
// See src/main.rs for a complete runnable example binary.