haz-exec 0.1.0

Async task execution engine for haz.
Documentation
//! Exit-code mapping for `EXEC-021`.
//!
//! Maps a finished [`crate::run_graph::RunGraphOutcome`] (and an
//! optional [`CancellationSignal`] recorded by the binary's OS
//! signal handler) to the numeric exit status the `haz run`
//! process MUST report.
//!
//! Per locked decision D5 of the parent haz-exec plan
//! ("Pure library. No `process::exit`, stdio writes, or signal
//! handlers inside haz-exec."), this module computes the number
//! and nothing else. The actual `process::exit(code)` call lives
//! in the binary that consumes haz-exec (haz-cli).
//!
//! # Spec contract (`EXEC-021`)
//!
//! The spec requires four distinguishable classes:
//!
//! 1. Success: every requested task reached the succeeded state
//!    and no runtime invariant (`EXEC-019` cycle, `EXEC-020`
//!    overlap) was violated. Maps to `0`.
//! 2. Task failure: at least one task reached the failed state
//!    under `EXEC-009`, or the lookup-then-spawn pipeline
//!    surfaced an error, or a runtime invariant was violated.
//!    Maps to `1` in this implementation. The spec leaves the
//!    specific number to implementation v1; the class-from-class
//!    distinguishability is the load-bearing requirement.
//! 3. Signal interruption: the run was cancelled because an OS
//!    signal flipped the cancellation token (`EXEC-012`). Maps
//!    to `128 + signal_number` per POSIX convention (`130` for
//!    `SIGINT`, `143` for `SIGTERM`).
//! 4. Workspace-load / internal error: handled by the consuming
//!    binary (haz-cli), which by definition never invokes the
//!    scheduler if workspace load fails. This module does NOT
//!    produce a number for class 4; the binary picks a value
//!    distinct from those this module returns.
//!
//! # Precedence
//!
//! Signal interruption wins over task failure. A run where the
//! user pressed Ctrl+C while a task was failing exits with the
//! signal code, mirroring POSIX semantics for a process killed
//! by signal.
//!
//! # Cancellation without a recorded signal
//!
//! A [`crate::run_task::RunOutcome::Cancelled`] entry without
//! an accompanying [`CancellationSignal`] argument indicates an
//! internal cancellation (e.g. the scheduler tripped its own
//! child token after detecting a runtime cycle per `EXEC-019`).
//! In that case the runtime-cycle diagnostic lives in
//! [`crate::run_graph::RunGraphOutcome::invariant_violations`],
//! which by itself maps to the task-failure class. Treating
//! bare-Cancelled-without-signal as a third failure source
//! would double-count the cycle, so the helper does not.
//!
//! # Skipped tasks
//!
//! [`crate::run_task::RunOutcome::Skipped`] entries are effects,
//! not causes: a skip records that the cascade prevented a task
//! from running, never that the task itself failed. The original
//! failure or cancellation that caused the cascade appears in
//! the same `outcomes` map (as a `Completed` with state `Failed`
//! or a `Cancelled` entry), or in `task_errors`, or in
//! `invariant_violations`. The helper consults those sources
//! directly; it does not re-count skips.

use crate::run_graph::RunGraphOutcome;
use crate::run_task::{RunOutcome, RunState};

/// The OS signal that initiated a run-cancellation request, per
/// `EXEC-012`.
///
/// Captured by the consuming binary's signal handler and passed
/// to [`exit_code_for`] so the resulting exit code reflects POSIX
/// convention (`128 + signal_number`, per `EXEC-021`).
///
/// This type is intentionally distinct from
/// [`crate::process::Signal`] (which is the EXECUTOR-to-child
/// signal vocabulary and includes `Kill`). `CancellationSignal`
/// is INCOMING from the OS to the haz binary; only `SIGINT` and
/// `SIGTERM` are user-installable cancellation sources
/// (`SIGKILL` is uncatchable).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CancellationSignal {
    /// `SIGINT`, the interactive interrupt (typically Ctrl+C in
    /// a foreground shell). POSIX signal number 2.
    Interrupt,
    /// `SIGTERM`, the polite-termination signal. POSIX signal
    /// number 15.
    Terminate,
}

impl CancellationSignal {
    /// POSIX signal number for this cancellation signal.
    ///
    /// Returns `2` for [`Self::Interrupt`] and `15` for
    /// [`Self::Terminate`].
    #[must_use]
    pub const fn posix_number(self) -> i32 {
        match self {
            Self::Interrupt => 2,
            Self::Terminate => 15,
        }
    }
}

/// Exit code reported when at least one task failed, surfaced a
/// pipeline error, or a runtime invariant was violated, AND the
/// run was not cancelled by an OS signal.
///
/// The spec (`EXEC-021`) leaves the specific number for the
/// task-failure class to implementation v1; `1` is the natural
/// POSIX "something went wrong" sentinel. The class is what is
/// load-bearing, not the number.
pub const EXIT_TASK_FAILURE: i32 = 1;

/// Compute the process exit code for a finished
/// [`RunGraphOutcome`] per `EXEC-021`.
///
/// - `signal: Some(_)` ALWAYS wins: returns
///   `128 + signal.posix_number()` (`130` for SIGINT, `143` for
///   SIGTERM). The user's cancellation intent dominates any
///   concurrent task failure (POSIX convention).
/// - Otherwise, returns [`EXIT_TASK_FAILURE`] when any of the
///   following holds:
///     - `outcome.task_errors` is non-empty.
///     - `outcome.invariant_violations` is non-empty.
///     - any entry in `outcome.outcomes` is a
///       [`RunOutcome::Completed`] whose state is
///       [`RunState::Failed`].
/// - Otherwise, returns `0`.
///
/// See the module-level documentation for the design rationale,
/// particularly around `Skipped` and `Cancelled` entries not
/// being independent failure indicators.
#[must_use]
pub fn exit_code_for(outcome: &RunGraphOutcome, signal: Option<CancellationSignal>) -> i32 {
    if let Some(sig) = signal {
        return 128 + sig.posix_number();
    }
    if is_failure(outcome) {
        return EXIT_TASK_FAILURE;
    }
    0
}

fn is_failure(outcome: &RunGraphOutcome) -> bool {
    if !outcome.task_errors.is_empty() {
        return true;
    }
    if !outcome.invariant_violations.is_empty() {
        return true;
    }
    outcome.outcomes.values().any(|o| match o {
        RunOutcome::Completed(rec) => rec.state == RunState::Failed,
        RunOutcome::Skipped(_) | RunOutcome::Cancelled(_) => false,
    })
}

#[cfg(test)]
mod tests {
    use std::collections::{BTreeMap, BTreeSet};
    use std::str::FromStr;

    use haz_domain::name::{ProjectName, TaskName};
    use haz_domain::task_id::TaskId;

    use crate::exit_code::{CancellationSignal, EXIT_TASK_FAILURE, exit_code_for};
    use crate::run_graph::{RunGraphOutcome, RuntimeInvariantViolation};
    use crate::run_task::{CompletedRecord, RunOutcome, RunSource, RunState};

    fn tid(project: &str, task: &str) -> TaskId {
        TaskId {
            project: ProjectName::from_str(project).unwrap(),
            task: TaskName::from_str(task).unwrap(),
        }
    }

    fn completed(task: TaskId, state: RunState) -> RunOutcome {
        RunOutcome::Completed(CompletedRecord {
            task,
            source: RunSource::FreshRun,
            state,
            exit_status: None,
            stdout_hash: [0; 32],
            stderr_hash: [0; 32],
            materialised_outputs: Vec::new(),
        })
    }

    fn empty_outcome() -> RunGraphOutcome {
        RunGraphOutcome {
            outcomes: BTreeMap::new(),
            task_errors: BTreeMap::new(),
            invariant_violations: Vec::new(),
        }
    }

    fn outcome_with_completed(entries: Vec<(TaskId, RunState)>) -> RunGraphOutcome {
        let mut out = empty_outcome();
        for (task, state) in entries {
            out.outcomes.insert(task.clone(), completed(task, state));
        }
        out
    }

    #[test]
    fn exec_021_empty_outcome_returns_zero() {
        assert_eq!(exit_code_for(&empty_outcome(), None), 0);
    }

    #[test]
    fn exec_021_all_succeeded_returns_zero() {
        let outcome = outcome_with_completed(vec![
            (tid("p", "a"), RunState::Succeeded),
            (tid("p", "b"), RunState::Succeeded),
        ]);
        assert_eq!(exit_code_for(&outcome, None), 0);
    }

    #[test]
    fn exec_021_failed_task_returns_task_failure_code() {
        let outcome = outcome_with_completed(vec![
            (tid("p", "a"), RunState::Succeeded),
            (tid("p", "b"), RunState::Failed),
        ]);
        assert_eq!(exit_code_for(&outcome, None), EXIT_TASK_FAILURE);
    }

    #[test]
    fn exec_021_invariant_violation_alone_returns_task_failure_code() {
        // Per the spec, a runtime cycle keeps each cycle member's
        // per-task outcome `Succeeded` (the cycle is a run-level
        // diagnostic); only `invariant_violations` records the
        // violation. The exit code must still be non-zero.
        let mut outcome = outcome_with_completed(vec![
            (tid("lib", "produce"), RunState::Succeeded),
            (tid("app", "consume"), RunState::Succeeded),
        ]);
        outcome
            .invariant_violations
            .push(RuntimeInvariantViolation::RuntimeCycle {
                nodes: BTreeSet::from([tid("lib", "produce"), tid("app", "consume")]),
                offending_edge: (tid("lib", "produce"), tid("app", "consume")),
            });
        assert_eq!(exit_code_for(&outcome, None), EXIT_TASK_FAILURE);
    }

    #[test]
    fn exec_021_signal_interrupt_returns_130() {
        let outcome = empty_outcome();
        assert_eq!(
            exit_code_for(&outcome, Some(CancellationSignal::Interrupt)),
            130
        );
    }

    #[test]
    fn exec_021_signal_terminate_returns_143() {
        let outcome = empty_outcome();
        assert_eq!(
            exit_code_for(&outcome, Some(CancellationSignal::Terminate)),
            143
        );
    }

    #[test]
    fn exec_021_signal_wins_over_task_failure() {
        let outcome = outcome_with_completed(vec![(tid("p", "a"), RunState::Failed)]);
        assert_eq!(
            exit_code_for(&outcome, Some(CancellationSignal::Interrupt)),
            130,
        );
        assert_eq!(
            exit_code_for(&outcome, Some(CancellationSignal::Terminate)),
            143,
        );
    }

    #[test]
    fn exec_021_signal_wins_over_invariant_violation() {
        let mut outcome = empty_outcome();
        outcome
            .invariant_violations
            .push(RuntimeInvariantViolation::RuntimeCycle {
                nodes: BTreeSet::from([tid("p", "a"), tid("p", "b")]),
                offending_edge: (tid("p", "a"), tid("p", "b")),
            });
        assert_eq!(
            exit_code_for(&outcome, Some(CancellationSignal::Interrupt)),
            130,
        );
    }

    #[test]
    fn cancellation_signal_posix_number_matches_spec() {
        assert_eq!(CancellationSignal::Interrupt.posix_number(), 2);
        assert_eq!(CancellationSignal::Terminate.posix_number(), 15);
    }

    #[test]
    fn exit_task_failure_constant_is_one() {
        assert_eq!(EXIT_TASK_FAILURE, 1);
    }
}