haz-exec 0.1.0 - Docs.rs

//! Shared scaffolding for the multi-project integration tests
//! in this crate.
//!
//! Each test file in `tests/` is its own Cargo integration target;
//! Cargo bundles a `common/` subdirectory by convention into every
//! target that does `mod common;`. The items below are visible to
//! every scenario file in `tests/` and only to them.
//!
//! The scope is deliberately small: a `Fixture<F>` over either
//! `MemFilesystem` or `StdFilesystem`, a `Recorder` impl of
//! `RunObserver`, and the workspace / project / task / graph
//! builders the scenario files need. No assertion DSL: scenario
//! files match on `RunOutcome` / `CompletedRecord` directly.

#![allow(dead_code)] // not every helper is used by every scenario

use std::collections::{BTreeMap, BTreeSet};
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Mutex;

use haz_cache::Cache;
use haz_dag::edge::{Edge, EdgeKind};
use haz_dag::graph::TaskGraph;
use haz_domain::action::TaskAction;
use haz_domain::env::{EnvSettings, EnvVarName};
use haz_domain::mutex::{Mutex as DomainMutex, MutexMode, MutexScope};
use haz_domain::name::{MutexName, ProjectName, TagName, TaskName};
use haz_domain::path::{CanonicalPath, InputSpec, OutputSpec, ProjectRoot, WorkspaceRootPath};
use haz_domain::project::Project;
use haz_domain::settings::WorkspaceSettings;
use haz_domain::settings::cache::HashAlgo;
use haz_domain::settings::concurrency::ConcurrencyDefault;
use haz_domain::task::Task;
use haz_domain::task_id::TaskId;
use haz_domain::task_ref::TaskRef;
use haz_domain::workspace::Workspace;
use haz_exec::mock_impl::MockProcessSpawner;
use haz_exec::process::ProcessSpawner;
use haz_exec::run_task::{
    CancelledRecord, CompletedRecord, RunContext, RunObserver, RunOutcome, RunSource, RunState,
    SkipCause, SkipRecord,
};
use haz_vfs::{MemFilesystem, StdFilesystem, WritableFilesystem};
use nonempty::NonEmpty;
use tokio_util::sync::CancellationToken;

// ============================================================
// Identifier and reference builders
// ============================================================

pub fn project_name(s: &str) -> ProjectName {
    ProjectName::from_str(s).unwrap()
}

pub fn task_name(s: &str) -> TaskName {
    TaskName::from_str(s).unwrap()
}

pub fn tag_name(s: &str) -> TagName {
    TagName::from_str(s).unwrap()
}

pub fn tid(project: &str, task: &str) -> TaskId {
    TaskId {
        project: project_name(project),
        task: task_name(task),
    }
}

pub fn input_spec(s: &str) -> InputSpec {
    InputSpec::parse(s).unwrap()
}

pub fn output_spec(s: &str) -> OutputSpec {
    OutputSpec::parse(s).unwrap()
}

pub fn task_ref(s: &str) -> TaskRef {
    TaskRef::parse(s).unwrap()
}

pub fn nested_project_root(path: &str) -> ProjectRoot {
    ProjectRoot::Nested(CanonicalPath::parse_workspace_absolute(path).unwrap())
}

pub fn workspace_absolute_canonical(path: &str) -> CanonicalPath {
    CanonicalPath::parse_workspace_absolute(path).unwrap()
}

// ============================================================
// Action / task / project / graph builders
// ============================================================

pub fn command_true() -> TaskAction {
    TaskAction::Command(NonEmpty::from_vec(vec!["true".to_owned()]).unwrap())
}

pub struct TaskBuilder {
    name: String,
    action: TaskAction,
    inputs: Vec<InputSpec>,
    outputs: Vec<OutputSpec>,
    deps: Vec<TaskRef>,
    weak_deps: Vec<TaskRef>,
    tags: BTreeSet<TagName>,
    mutex: Option<DomainMutex>,
}

impl TaskBuilder {
    pub fn new(name: &str) -> Self {
        Self {
            name: name.to_owned(),
            action: command_true(),
            inputs: Vec::new(),
            outputs: Vec::new(),
            deps: Vec::new(),
            weak_deps: Vec::new(),
            tags: BTreeSet::new(),
            mutex: None,
        }
    }

    /// Override the default `command: ["true"]` action with an
    /// explicit argv. Used by scenarios that need each task to
    /// carry a distinct cache key (`CACHE-001`'s content-
    /// addressing de-duplicates byte-identical work across
    /// tasks).
    pub fn command(mut self, argv: &[&str]) -> Self {
        let argv: Vec<String> = argv.iter().map(|s| (*s).to_owned()).collect();
        self.action = TaskAction::Command(NonEmpty::from_vec(argv).expect("non-empty argv"));
        self
    }

    pub fn input(mut self, pattern: &str) -> Self {
        self.inputs.push(input_spec(pattern));
        self
    }

    pub fn output(mut self, pattern: &str) -> Self {
        self.outputs.push(output_spec(pattern));
        self
    }

    pub fn dep(mut self, reference: &str) -> Self {
        self.deps.push(task_ref(reference));
        self
    }

    /// Attach a mutex declaration. Workspace-global vs project-
    /// scoped is selected by `scope` (matches `MUTEX-001`);
    /// `mode` selects exclusive vs shared (`MUTEX-002`).
    pub fn mutex(mut self, scope: MutexScope, name: &str, mode: MutexMode) -> Self {
        self.mutex = Some(DomainMutex {
            scope,
            name: MutexName::from_str(name).unwrap(),
            mode,
        });
        self
    }

    pub fn build(self) -> Task {
        Task {
            name: task_name(&self.name),
            action: self.action,
            inputs: self.inputs,
            outputs: self.outputs,
            deps: self.deps,
            weak_deps: self.weak_deps,
            mutex: self.mutex,
            env: EnvSettings::default(),
        }
    }
}

pub fn make_project(name: &str, root: ProjectRoot, tasks: Vec<Task>) -> Project {
    make_project_with_tags(name, root, tasks, BTreeSet::new())
}

/// Build a project with an explicit set of tags. Project-level
/// tags are inherited by every task the project owns and feed the
/// per-tag concurrency cap (`CFG-014`, `EXEC-005`).
pub fn make_project_with_tags(
    name: &str,
    root: ProjectRoot,
    tasks: Vec<Task>,
    tags: BTreeSet<TagName>,
) -> Project {
    let tasks: BTreeMap<TaskName, Task> = tasks.into_iter().map(|t| (t.name.clone(), t)).collect();
    Project {
        name: project_name(name),
        root,
        tags,
        tasks,
    }
}

/// Build a [`WorkspaceSettings`] whose only deviation from the
/// `CFG-027`..`CFG-031` defaults is a fixed global concurrency cap
/// of `n`. Used by scenarios that need the cap not to be the
/// limiting factor (e.g. proving mutex serialisation, per-tag-cap
/// behaviour).
pub fn workspace_settings_with_fixed_cap(n: usize) -> WorkspaceSettings {
    let mut settings = WorkspaceSettings::default();
    settings.concurrency.default =
        ConcurrencyDefault::Fixed(NonZeroUsize::new(n).expect("cap must be non-zero"));
    settings
}

/// Build a [`WorkspaceSettings`] with a fixed global cap AND a
/// single per-tag cap (`EXEC-005`). Convenience over
/// [`workspace_settings_with_fixed_cap`] for scenarios that need
/// to prove tag-cap behaviour.
pub fn workspace_settings_with_tag_cap(
    default_cap: usize,
    tag: &str,
    tag_cap: usize,
) -> WorkspaceSettings {
    let mut settings = workspace_settings_with_fixed_cap(default_cap);
    settings.concurrency.tags.insert(
        tag_name(tag),
        NonZeroUsize::new(tag_cap).expect("tag cap must be non-zero"),
    );
    settings
}

pub fn make_workspace_at(
    workspace_host: &Path,
    projects: Vec<Project>,
    settings: WorkspaceSettings,
) -> Workspace {
    let projects: BTreeMap<ProjectName, Project> =
        projects.into_iter().map(|p| (p.name.clone(), p)).collect();
    Workspace {
        root: WorkspaceRootPath::try_new(workspace_host.to_path_buf()).unwrap(),
        projects,
        overlays: BTreeMap::new(),
        settings,
    }
}

pub fn make_graph(nodes: Vec<TaskId>, edges: Vec<Edge>) -> TaskGraph {
    TaskGraph {
        nodes: nodes.into_iter().collect(),
        edges: edges.into_iter().collect(),
    }
}

pub fn hard_edge(from: TaskId, to: TaskId) -> Edge {
    Edge {
        from,
        to,
        kind: EdgeKind::Hard,
    }
}

// ============================================================
// Fixture (filesystem-parameterised)
// ============================================================

pub struct Fixture<F: WritableFilesystem> {
    pub cache: Cache<F>,
    pub workspace: Workspace,
    pub graph: TaskGraph,
    pub host_env: BTreeMap<EnvVarName, String>,
    pub cancel: CancellationToken,
}

impl Fixture<MemFilesystem> {
    /// Build an in-memory fixture rooted at `workspace_host`. Both
    /// `workspace_host` and every nested project root declared by
    /// the workspace are pre-created in the simulated filesystem.
    pub fn new_mem(workspace_host: &Path, workspace: Workspace, graph: TaskGraph) -> Self {
        let mut fs = MemFilesystem::new();
        fs.add_dir(workspace_host).unwrap();
        for project in workspace.projects.values() {
            if let ProjectRoot::Nested(canonical) = &project.root {
                let host_path = workspace_host.join(workspace_absolute_to_relative(canonical));
                fs.add_dir(&host_path).unwrap();
            }
        }
        let cache = Cache::new(fs, workspace_host, HashAlgo::Blake3);
        Self {
            cache,
            workspace,
            graph,
            host_env: BTreeMap::new(),
            cancel: CancellationToken::new(),
        }
    }
}

impl Fixture<StdFilesystem> {
    /// Build a host-filesystem fixture rooted at `workspace_host`,
    /// which MUST already exist (typically a [`tempfile::TempDir`]
    /// path). Every nested project root declared by the workspace
    /// is `mkdir -p`'d under `workspace_host`.
    pub fn new_std(workspace_host: &Path, workspace: Workspace, graph: TaskGraph) -> Self {
        let fs = StdFilesystem::new();
        for project in workspace.projects.values() {
            if let ProjectRoot::Nested(canonical) = &project.root {
                let host_path = workspace_host.join(workspace_absolute_to_relative(canonical));
                std::fs::create_dir_all(&host_path).unwrap();
            }
        }
        let cache = Cache::new(fs, workspace_host, HashAlgo::Blake3);
        Self {
            cache,
            workspace,
            graph,
            host_env: BTreeMap::new(),
            cancel: CancellationToken::new(),
        }
    }
}

fn workspace_absolute_to_relative(canonical: &CanonicalPath) -> PathBuf {
    PathBuf::from(canonical.to_string().trim_start_matches('/'))
}

/// Build a [`RunContext`] over `fixture`, `spawner`, and `observer`.
///
/// The context borrows everything from `fixture` for its lifetime;
/// it is consumed by `run_graph` / `run_task`.
pub fn make_ctx<'a, F, S, O>(
    fixture: &'a Fixture<F>,
    spawner: &'a S,
    observer: &'a O,
) -> RunContext<'a, F, S, O>
where
    F: WritableFilesystem,
    S: ProcessSpawner,
    O: RunObserver,
{
    RunContext {
        fs: fixture.cache.fs(),
        cache: &fixture.cache,
        spawner,
        observer,
        workspace: &fixture.workspace,
        graph: &fixture.graph,
        host_env: &fixture.host_env,
        algo: HashAlgo::Blake3,
        cancel: &fixture.cancel,
    }
}

/// Resolve a workspace-absolute path (e.g. `/lib/out.o`) against
/// `workspace_host` to produce the host-filesystem path the cache
/// reads / writes. Used by scenario files to pre-write inputs and
/// outputs in either `MemFilesystem` or `StdFilesystem`.
pub fn host_path(workspace_host: &Path, workspace_absolute: &str) -> PathBuf {
    workspace_host.join(workspace_absolute.trim_start_matches('/'))
}

// ============================================================
// Spawner helper
// ============================================================

pub fn push_n_default_specs(spawner: &MockProcessSpawner, n: usize) {
    use haz_exec::mock_impl::MockSpec;
    for _ in 0..n {
        spawner.push_spec(MockSpec::default());
    }
}

pub fn push_spec_with_exit(spawner: &MockProcessSpawner, exit_code: i32) {
    use haz_exec::mock_impl::MockSpec;
    spawner.push_spec(MockSpec {
        exit_code,
        ..MockSpec::default()
    });
}

/// Push a spec that blocks until SIGTERM (or SIGKILL) and emits
/// the given pre-canned `stdout` / `stderr` bytes before the
/// blocking wait begins. Used by scenarios that need a task to
/// stay in-flight long enough for cancellation to trigger
/// (`EXEC-013`).
pub fn push_spec_terminate_with_streams(
    spawner: &MockProcessSpawner,
    stdout: &[u8],
    stderr: &[u8],
) {
    use haz_exec::mock_impl::{MockBehaviour, MockSpec};
    spawner.push_spec(MockSpec {
        behaviour: MockBehaviour::OnTerminate {
            graceful_exit_code: 0,
        },
        exit_code: 137,
        stdout: stdout.to_vec(),
        stderr: stderr.to_vec(),
        ..MockSpec::default()
    });
}

// ============================================================
// Observer
// ============================================================

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Event {
    Started(TaskId),
    Finished(TaskId, RunState, RunSource),
    Skipped(TaskId, SkipCause),
    Cancelled(TaskId, CancelledRecord),
}

#[derive(Default)]
pub struct Recorder {
    events: Mutex<Vec<Event>>,
}

impl Recorder {
    pub fn events(&self) -> Vec<Event> {
        self.events.lock().unwrap().clone()
    }

    pub fn started_order(&self) -> Vec<TaskId> {
        self.events()
            .into_iter()
            .filter_map(|e| match e {
                Event::Started(t) => Some(t),
                _ => None,
            })
            .collect()
    }

    /// Index of the first `Started(task)` event. Panics if absent.
    pub fn pos_started(&self, task: &TaskId) -> usize {
        self.events()
            .iter()
            .position(|e| matches!(e, Event::Started(t) if t == task))
            .unwrap_or_else(|| panic!("expected Started({task:?}) in {:?}", self.events()))
    }

    /// Index of the first `Finished(task, _, _)` event. Panics if absent.
    pub fn pos_finished(&self, task: &TaskId) -> usize {
        self.events()
            .iter()
            .position(|e| matches!(e, Event::Finished(t, _, _) if t == task))
            .unwrap_or_else(|| panic!("expected Finished({task:?}) in {:?}", self.events()))
    }

    /// Count of `Started(task)` events for `task`. Used to assert
    /// the `MUTEX-005` yield-on-contention invariant: a task that
    /// loses the mutex check returns to ready WITHOUT a duplicate
    /// `Started` event.
    pub fn count_started(&self, task: &TaskId) -> usize {
        self.events()
            .iter()
            .filter(|e| matches!(e, Event::Started(t) if t == task))
            .count()
    }
}

impl RunObserver for Recorder {
    fn on_task_started(&self, task: &TaskId) {
        self.events
            .lock()
            .unwrap()
            .push(Event::Started(task.clone()));
    }
    fn on_stdout(&self, _: &TaskId, _: &[u8]) {}
    fn on_stderr(&self, _: &TaskId, _: &[u8]) {}
    fn on_task_finished(&self, task: &TaskId, record: &CompletedRecord) {
        self.events.lock().unwrap().push(Event::Finished(
            task.clone(),
            record.state,
            record.source,
        ));
    }
    fn on_task_skipped(&self, task: &TaskId, record: &SkipRecord) {
        self.events
            .lock()
            .unwrap()
            .push(Event::Skipped(task.clone(), record.cause.clone()));
    }
    fn on_task_cancelled(&self, task: &TaskId, record: &CancelledRecord) {
        self.events
            .lock()
            .unwrap()
            .push(Event::Cancelled(task.clone(), record.clone()));
    }
}

// ============================================================
// Outcome unwrappers
// ============================================================

pub fn completed_for<'a>(
    outcomes: &'a BTreeMap<TaskId, RunOutcome>,
    task: &TaskId,
) -> &'a CompletedRecord {
    match outcomes.get(task) {
        Some(RunOutcome::Completed(r)) => r,
        Some(RunOutcome::Skipped(s)) => {
            panic!("expected Completed for {task:?}, got Skipped({s:?})")
        }
        Some(RunOutcome::Cancelled(c)) => {
            panic!("expected Completed for {task:?}, got Cancelled({c:?})")
        }
        None => panic!("no outcome for {task:?}"),
    }
}

pub fn skipped_for<'a>(
    outcomes: &'a BTreeMap<TaskId, RunOutcome>,
    task: &TaskId,
) -> &'a SkipRecord {
    match outcomes.get(task) {
        Some(RunOutcome::Skipped(s)) => s,
        Some(RunOutcome::Completed(r)) => {
            panic!("expected Skipped for {task:?}, got Completed({r:?})")
        }
        Some(RunOutcome::Cancelled(c)) => {
            panic!("expected Skipped for {task:?}, got Cancelled({c:?})")
        }
        None => panic!("no outcome for {task:?}"),
    }
}

pub fn cancelled_for<'a>(
    outcomes: &'a BTreeMap<TaskId, RunOutcome>,
    task: &TaskId,
) -> &'a CancelledRecord {
    match outcomes.get(task) {
        Some(RunOutcome::Cancelled(c)) => c,
        Some(RunOutcome::Completed(r)) => {
            panic!("expected Cancelled for {task:?}, got Completed({r:?})")
        }
        Some(RunOutcome::Skipped(s)) => {
            panic!("expected Cancelled for {task:?}, got Skipped({s:?})")
        }
        None => panic!("no outcome for {task:?}"),
    }
}