haz-exec 0.1.0

Async task execution engine for haz.
Documentation
//! Shared kernel for resolving a task's [`InputSpec`] / [`OutputSpec`]
//! patterns against the filesystem.
//!
//! Two distinct consumers exist:
//!
//! - [`crate::cache_key`]'s [`crate::build_cache_key`] resolves each
//!   declared [`InputSpec`](haz_domain::path::InputSpec), reading the
//!   bytes of every match and folding them into the cache-key contents.
//! - [`crate::run_task`]'s output-store path resolves each declared
//!   [`OutputSpec`](haz_domain::path::OutputSpec) after a successful
//!   run, recording the mode and host path of every match for the
//!   cache library to ingest.
//!
//! Both consumers project workspace-anchored patterns onto the host
//! filesystem the same way (`literal_workspace_segments` +
//! `host_path_from_segments` for literals, `glob_walk_origin` + a
//! recursive walker for globs); the only difference is what each one
//! does once a regular-file match has been identified. This module
//! captures the common projection helpers and the recursive walker,
//! and parameterises the per-match work behind the [`GlobMatchAction`]
//! trait.
//!
//! Visibility is `pub(crate)`: the abstraction is internal to
//! `haz-exec` and not part of its public surface.
//!
//! [`InputSpec`]: haz_domain::path::InputSpec
//! [`OutputSpec`]: haz_domain::path::OutputSpec

use std::path::{Path, PathBuf};

use haz_domain::path::{HazPath, PathAnchor, PathSegment, ProjectRoot};
use haz_vfs::{DirEntry, EntryKind, Filesystem, FsError};

/// Per-consumer hooks for [`GlobWalk::walk`].
///
/// A type implementing this trait pairs the consumer's owned output
/// shape (e.g. `OwnedInputFile`, `OwnedOutputFile`) with its typed
/// error vocabulary, and supplies the two callback points the walker
/// invokes:
///
/// - [`Self::map_walk_error`] turns an [`FsError`] raised during
///   directory traversal (a failed `read_dir`, a failed
///   metadata-follow on a symlink) into the consumer's error type.
/// - [`Self::on_match`] runs the per-match work for one regular-file
///   entry whose walk-relative path matched the glob: read bytes and
///   hash them, read mode bits, etc. The walker hands over the
///   filesystem handle, the host path, and the synthesized
///   workspace-absolute path string, and lets the action push the
///   resulting [`Self::Output`] into the caller-supplied vec.
pub(crate) trait GlobMatchAction<F: Filesystem> {
    /// One entry the action contributes per matched file.
    type Output;
    /// Typed error the action surfaces from its callback points.
    type Error;

    /// Wrap an [`FsError`] raised while the walker was reading a
    /// directory or following a symlink. `root` is the host path the
    /// walker was operating on at the moment of failure.
    fn map_walk_error(&self, root: PathBuf, source: FsError) -> Self::Error;

    /// Perform the consumer-specific work for one matched regular
    /// file.
    ///
    /// - `host_path` is the host filesystem path of the matched
    ///   entry (the symlink's own path, not its target, for
    ///   symlink-to-file matches).
    /// - `workspace_absolute_path` is the workspace-anchored path
    ///   string (rooted at `/`) the consumer should record alongside
    ///   the output entry.
    /// - `out` is the caller's accumulator; the action appends one
    ///   [`Self::Output`] on success.
    fn on_match(
        &self,
        fs: &F,
        host_path: &Path,
        workspace_absolute_path: String,
        out: &mut Vec<Self::Output>,
    ) -> Result<(), Self::Error>;
}

/// Per-glob walk state: holds every immutable input the recursion
/// needs, so the recursive methods take only the changing parts.
pub(crate) struct GlobWalk<'a, F: Filesystem, A: GlobMatchAction<F>> {
    /// Filesystem the walker reads directory entries and follows
    /// symlinks through.
    pub fs: &'a F,
    /// Compiled glob matcher.
    pub matcher: &'a globset::GlobMatcher,
    /// Either `"/"` (workspace-absolute glob) or `""`
    /// (project-relative glob). Prepended to the joined walk-relative
    /// segments to form the candidate string fed to the matcher.
    pub candidate_prefix: &'static str,
    /// Either `""` (workspace-absolute glob, or implicit-mode project)
    /// or `"/seg1/seg2"` (project-relative glob in a nested project).
    /// Prepended (followed by `"/"`) to the joined walk-relative
    /// segments to form an entry's `workspace_absolute_path`.
    pub workspace_prefix: String,
    /// Per-match action: emits one [`A::Output`] per matched file and
    /// owns the consumer's error vocabulary.
    pub action: &'a A,
}

impl<F: Filesystem, A: GlobMatchAction<F>> GlobWalk<'_, F, A> {
    /// Recursively walk `walk_dir`, descending into subdirectories
    /// and following symlinks; invoke
    /// [`GlobMatchAction::on_match`] for every regular-file entry
    /// whose joined walk-relative path matches the configured glob.
    ///
    /// `walk_rel` is the path of the walker relative to the original
    /// walk origin, expressed as a stack of UTF-8 segment strings.
    /// The walker pushes and pops in lock-step with the recursion so
    /// the same buffer threads through every level.
    pub fn walk(
        &self,
        walk_dir: &Path,
        walk_rel: &mut Vec<String>,
        out: &mut Vec<A::Output>,
    ) -> Result<(), A::Error> {
        let entries = self
            .fs
            .read_dir(walk_dir)
            .map_err(|source| self.action.map_walk_error(walk_dir.to_path_buf(), source))?;
        for entry in entries {
            let Some(name) = entry
                .path
                .file_name()
                .and_then(|n| n.to_str())
                .map(str::to_owned)
            else {
                // Non-UTF-8 names cannot appear in any haz pattern
                // (PathSegment forbids them at parse time), so they
                // cannot match any glob; skipping them loses no
                // information that the cache key could otherwise
                // capture.
                continue;
            };
            walk_rel.push(name);
            let r = self.visit_entry(&entry, walk_rel, out);
            walk_rel.pop();
            r?;
        }
        Ok(())
    }

    fn visit_entry(
        &self,
        entry: &DirEntry,
        walk_rel: &mut Vec<String>,
        out: &mut Vec<A::Output>,
    ) -> Result<(), A::Error> {
        match entry.metadata.kind {
            EntryKind::Dir => self.walk(&entry.path, walk_rel, out),
            EntryKind::File => self.maybe_match_file(&entry.path, walk_rel, out),
            EntryKind::Symlink => {
                let target_meta = self
                    .fs
                    .metadata(&entry.path)
                    .map_err(|source| self.action.map_walk_error(entry.path.clone(), source))?;
                match target_meta.kind {
                    EntryKind::Dir => self.walk(&entry.path, walk_rel, out),
                    EntryKind::File => self.maybe_match_file(&entry.path, walk_rel, out),
                    EntryKind::Symlink
                    | EntryKind::BlockDevice
                    | EntryKind::CharDevice
                    | EntryKind::Fifo
                    | EntryKind::Socket => Ok(()),
                }
            }
            EntryKind::BlockDevice
            | EntryKind::CharDevice
            | EntryKind::Fifo
            | EntryKind::Socket => Ok(()),
        }
    }

    fn maybe_match_file(
        &self,
        host_path: &Path,
        walk_rel: &[String],
        out: &mut Vec<A::Output>,
    ) -> Result<(), A::Error> {
        let candidate = format!("{}{}", self.candidate_prefix, walk_rel.join("/"));
        if !self.matcher.is_match(&candidate) {
            return Ok(());
        }
        let workspace_absolute_path = format!("{}/{}", self.workspace_prefix, walk_rel.join("/"));
        self.action
            .on_match(self.fs, host_path, workspace_absolute_path, out)
    }
}

/// Lift a literal [`HazPath`] (under the supplied [`ProjectRoot`]) into
/// its workspace-absolute segment view.
///
/// - [`HazPath::WorkspaceAbsolute`] passes through unchanged.
/// - [`HazPath::ProjectRelative`] under [`ProjectRoot::Nested`] is
///   prefixed by the project root's segments.
/// - [`HazPath::ProjectRelative`] under [`ProjectRoot::WorkspaceRoot`]
///   (implicit-mode project per `DISC-003`) is workspace-absolute
///   already.
pub(crate) fn literal_workspace_segments<'a>(
    haz_path: &'a HazPath,
    project_root: &'a ProjectRoot,
) -> Vec<&'a PathSegment> {
    match (haz_path, project_root) {
        (HazPath::WorkspaceAbsolute(segs), _)
        | (HazPath::ProjectRelative(segs), ProjectRoot::WorkspaceRoot) => segs.iter().collect(),
        (HazPath::ProjectRelative(rel), ProjectRoot::Nested(cp)) => {
            let mut v: Vec<&PathSegment> = cp.segments().iter().collect();
            v.extend(rel.iter());
            v
        }
    }
}

/// Compute the glob-walk origin: the host directory at which the walk
/// starts, the workspace-absolute prefix to prepend to every matched
/// path (`"/proj_root"` for `ProjectRelative` + [`ProjectRoot::Nested`];
/// `""` otherwise), and the candidate-string prefix that turns
/// walk-relative segments into the spelling globset matches against
/// (`"/"` for [`PathAnchor::WorkspaceAbsolute`]; `""` for
/// [`PathAnchor::ProjectRelative`], matching each pattern's `Display`
/// form).
pub(crate) fn glob_walk_origin(
    workspace_host: &Path,
    project_root: &ProjectRoot,
    anchor: PathAnchor,
) -> (PathBuf, String, &'static str) {
    match (anchor, project_root) {
        (PathAnchor::WorkspaceAbsolute, _) => (workspace_host.to_path_buf(), String::new(), "/"),
        (PathAnchor::ProjectRelative, ProjectRoot::WorkspaceRoot) => {
            (workspace_host.to_path_buf(), String::new(), "")
        }
        (PathAnchor::ProjectRelative, ProjectRoot::Nested(cp)) => {
            let segs: Vec<&PathSegment> = cp.segments().iter().collect();
            (
                host_path_from_segments(workspace_host, &segs),
                workspace_absolute_string_from_segments(&segs),
                "",
            )
        }
    }
}

/// Build a host filesystem path by pushing each workspace-segment's
/// string form onto `workspace_root`.
pub(crate) fn host_path_from_segments(workspace_root: &Path, segments: &[&PathSegment]) -> PathBuf {
    let mut p = workspace_root.to_path_buf();
    for s in segments {
        p.push(s.as_str());
    }
    p
}

/// Render a sequence of workspace-segments as a workspace-absolute
/// path string (`"/seg1/seg2/..."`).
pub(crate) fn workspace_absolute_string_from_segments(segments: &[&PathSegment]) -> String {
    let mut s = String::new();
    for seg in segments {
        s.push('/');
        s.push_str(seg.as_str());
    }
    s
}