Skip to main content

haz_cache_key/
pattern_walk.rs

1//! Shared kernel for resolving a task's [`InputSpec`] / [`OutputSpec`]
2//! patterns against the filesystem.
3//!
4//! Two distinct consumers exist:
5//!
6//! - Cache-key input resolution resolves each declared
7//!   [`InputSpec`](haz_domain::path::InputSpec), reading the bytes of
8//!   every match and folding them into the cache-key contents.
9//! - The executor's output-store path resolves each declared
10//!   [`OutputSpec`](haz_domain::path::OutputSpec) after a successful
11//!   run, recording the mode and host path of every match for the
12//!   cache library to ingest.
13//!
14//! Both consumers project workspace-anchored patterns onto the host
15//! filesystem the same way (`literal_workspace_segments` +
16//! `host_path_from_segments` for literals, `glob_walk_origin` + a
17//! recursive walker for globs); the only difference is what each one
18//! does once a regular-file match has been identified. This module
19//! captures the common projection helpers and the recursive walker,
20//! and parameterises the per-match work behind the [`GlobMatchAction`]
21//! trait.
22//!
23//! [`InputSpec`]: haz_domain::path::InputSpec
24//! [`OutputSpec`]: haz_domain::path::OutputSpec
25
26use std::path::{Path, PathBuf};
27
28use haz_domain::path::{HazPath, PathAnchor, PathSegment, ProjectRoot};
29use haz_vfs::{DirEntry, EntryKind, Filesystem, FsError};
30
31/// Per-consumer hooks for [`GlobWalk::walk`].
32///
33/// A type implementing this trait pairs the consumer's owned output
34/// shape (e.g. `OwnedInputFile`, `OwnedOutputFile`) with its typed
35/// error vocabulary, and supplies the two callback points the walker
36/// invokes:
37///
38/// - [`Self::map_walk_error`] turns an [`FsError`] raised during
39///   directory traversal (a failed `read_dir`, a failed
40///   metadata-follow on a symlink) into the consumer's error type.
41/// - [`Self::on_match`] runs the per-match work for one regular-file
42///   entry whose walk-relative path matched the glob: read bytes and
43///   hash them, read mode bits, etc. The walker hands over the
44///   filesystem handle, the host path, and the synthesized
45///   workspace-absolute path string, and lets the action push the
46///   resulting [`Self::Output`] into the caller-supplied vec.
47pub trait GlobMatchAction<F: Filesystem> {
48    /// One entry the action contributes per matched file.
49    type Output;
50    /// Typed error the action surfaces from its callback points.
51    type Error;
52
53    /// Wrap an [`FsError`] raised while the walker was reading a
54    /// directory or following a symlink. `root` is the host path the
55    /// walker was operating on at the moment of failure.
56    fn map_walk_error(&self, root: PathBuf, source: FsError) -> Self::Error;
57
58    /// Perform the consumer-specific work for one matched regular
59    /// file.
60    ///
61    /// - `host_path` is the host filesystem path of the matched
62    ///   entry (the symlink's own path, not its target, for
63    ///   symlink-to-file matches).
64    /// - `workspace_absolute_path` is the workspace-anchored path
65    ///   string (rooted at `/`) the consumer should record alongside
66    ///   the output entry.
67    /// - `out` is the caller's accumulator; the action appends one
68    ///   [`Self::Output`] on success.
69    ///
70    /// # Errors
71    ///
72    /// Returns the action's [`Self::Error`] if its per-match work
73    /// fails (for example, reading or hashing the matched file).
74    fn on_match(
75        &self,
76        fs: &F,
77        host_path: &Path,
78        workspace_absolute_path: String,
79        out: &mut Vec<Self::Output>,
80    ) -> Result<(), Self::Error>;
81}
82
83/// Per-glob walk state: holds every immutable input the recursion
84/// needs, so the recursive methods take only the changing parts.
85pub struct GlobWalk<'a, F: Filesystem, A: GlobMatchAction<F>> {
86    /// Filesystem the walker reads directory entries and follows
87    /// symlinks through.
88    pub fs: &'a F,
89    /// Compiled glob matcher.
90    pub matcher: &'a globset::GlobMatcher,
91    /// Either `"/"` (workspace-absolute glob) or `""`
92    /// (project-relative glob). Prepended to the joined walk-relative
93    /// segments to form the candidate string fed to the matcher.
94    pub candidate_prefix: &'static str,
95    /// Either `""` (workspace-absolute glob, or implicit-mode project)
96    /// or `"/seg1/seg2"` (project-relative glob in a nested project).
97    /// Prepended (followed by `"/"`) to the joined walk-relative
98    /// segments to form an entry's `workspace_absolute_path`.
99    pub workspace_prefix: String,
100    /// Per-match action: emits one [`GlobMatchAction::Output`] per matched file and
101    /// owns the consumer's error vocabulary.
102    pub action: &'a A,
103}
104
105impl<F: Filesystem, A: GlobMatchAction<F>> GlobWalk<'_, F, A> {
106    /// Recursively walk `walk_dir`, descending into subdirectories
107    /// and following symlinks; invoke
108    /// [`GlobMatchAction::on_match`] for every regular-file entry
109    /// whose joined walk-relative path matches the configured glob.
110    ///
111    /// `walk_rel` is the path of the walker relative to the original
112    /// walk origin, expressed as a stack of UTF-8 segment strings.
113    /// The walker pushes and pops in lock-step with the recursion so
114    /// the same buffer threads through every level.
115    ///
116    /// # Errors
117    ///
118    /// Returns the action's error type (via
119    /// [`GlobMatchAction::map_walk_error`] or
120    /// [`GlobMatchAction::on_match`]) if a directory read, a symlink
121    /// metadata follow, or the per-match work fails.
122    pub fn walk(
123        &self,
124        walk_dir: &Path,
125        walk_rel: &mut Vec<String>,
126        out: &mut Vec<A::Output>,
127    ) -> Result<(), A::Error> {
128        let entries = self
129            .fs
130            .read_dir(walk_dir)
131            .map_err(|source| self.action.map_walk_error(walk_dir.to_path_buf(), source))?;
132        for entry in entries {
133            let Some(name) = entry
134                .path
135                .file_name()
136                .and_then(|n| n.to_str())
137                .map(str::to_owned)
138            else {
139                // Non-UTF-8 names cannot appear in any haz pattern
140                // (PathSegment forbids them at parse time), so they
141                // cannot match any glob; skipping them loses no
142                // information that the cache key could otherwise
143                // capture.
144                continue;
145            };
146            walk_rel.push(name);
147            let r = self.visit_entry(&entry, walk_rel, out);
148            walk_rel.pop();
149            r?;
150        }
151        Ok(())
152    }
153
154    fn visit_entry(
155        &self,
156        entry: &DirEntry,
157        walk_rel: &mut Vec<String>,
158        out: &mut Vec<A::Output>,
159    ) -> Result<(), A::Error> {
160        match entry.metadata.kind {
161            EntryKind::Dir => self.walk(&entry.path, walk_rel, out),
162            EntryKind::File => self.maybe_match_file(&entry.path, walk_rel, out),
163            EntryKind::Symlink => {
164                let target_meta = self
165                    .fs
166                    .metadata(&entry.path)
167                    .map_err(|source| self.action.map_walk_error(entry.path.clone(), source))?;
168                match target_meta.kind {
169                    EntryKind::Dir => self.walk(&entry.path, walk_rel, out),
170                    EntryKind::File => self.maybe_match_file(&entry.path, walk_rel, out),
171                    EntryKind::Symlink
172                    | EntryKind::BlockDevice
173                    | EntryKind::CharDevice
174                    | EntryKind::Fifo
175                    | EntryKind::Socket => Ok(()),
176                }
177            }
178            EntryKind::BlockDevice
179            | EntryKind::CharDevice
180            | EntryKind::Fifo
181            | EntryKind::Socket => Ok(()),
182        }
183    }
184
185    fn maybe_match_file(
186        &self,
187        host_path: &Path,
188        walk_rel: &[String],
189        out: &mut Vec<A::Output>,
190    ) -> Result<(), A::Error> {
191        let candidate = format!("{}{}", self.candidate_prefix, walk_rel.join("/"));
192        if !self.matcher.is_match(&candidate) {
193            return Ok(());
194        }
195        let workspace_absolute_path = format!("{}/{}", self.workspace_prefix, walk_rel.join("/"));
196        self.action
197            .on_match(self.fs, host_path, workspace_absolute_path, out)
198    }
199}
200
201/// Lift a literal [`HazPath`] (under the supplied [`ProjectRoot`]) into
202/// its workspace-absolute segment view.
203///
204/// - [`HazPath::WorkspaceAbsolute`] passes through unchanged.
205/// - [`HazPath::ProjectRelative`] under [`ProjectRoot::Nested`] is
206///   prefixed by the project root's segments.
207/// - [`HazPath::ProjectRelative`] under [`ProjectRoot::WorkspaceRoot`]
208///   (implicit-mode project per `DISC-003`) is workspace-absolute
209///   already.
210#[must_use]
211pub fn literal_workspace_segments<'a>(
212    haz_path: &'a HazPath,
213    project_root: &'a ProjectRoot,
214) -> Vec<&'a PathSegment> {
215    match (haz_path, project_root) {
216        (HazPath::WorkspaceAbsolute(segs), _)
217        | (HazPath::ProjectRelative(segs), ProjectRoot::WorkspaceRoot) => segs.iter().collect(),
218        (HazPath::ProjectRelative(rel), ProjectRoot::Nested(cp)) => {
219            let mut v: Vec<&PathSegment> = cp.segments().iter().collect();
220            v.extend(rel.iter());
221            v
222        }
223    }
224}
225
226/// Compute the glob-walk origin: the host directory at which the walk
227/// starts, the workspace-absolute prefix to prepend to every matched
228/// path (`"/proj_root"` for `ProjectRelative` + [`ProjectRoot::Nested`];
229/// `""` otherwise), and the candidate-string prefix that turns
230/// walk-relative segments into the spelling globset matches against
231/// (`"/"` for [`PathAnchor::WorkspaceAbsolute`]; `""` for
232/// [`PathAnchor::ProjectRelative`], matching each pattern's `Display`
233/// form).
234#[must_use]
235pub fn glob_walk_origin(
236    workspace_host: &Path,
237    project_root: &ProjectRoot,
238    anchor: PathAnchor,
239) -> (PathBuf, String, &'static str) {
240    match (anchor, project_root) {
241        (PathAnchor::WorkspaceAbsolute, _) => (workspace_host.to_path_buf(), String::new(), "/"),
242        (PathAnchor::ProjectRelative, ProjectRoot::WorkspaceRoot) => {
243            (workspace_host.to_path_buf(), String::new(), "")
244        }
245        (PathAnchor::ProjectRelative, ProjectRoot::Nested(cp)) => {
246            let segs: Vec<&PathSegment> = cp.segments().iter().collect();
247            (
248                host_path_from_segments(workspace_host, &segs),
249                workspace_absolute_string_from_segments(&segs),
250                "",
251            )
252        }
253    }
254}
255
256/// Build a host filesystem path by pushing each workspace-segment's
257/// string form onto `workspace_root`.
258#[must_use]
259pub fn host_path_from_segments(workspace_root: &Path, segments: &[&PathSegment]) -> PathBuf {
260    let mut p = workspace_root.to_path_buf();
261    for s in segments {
262        p.push(s.as_str());
263    }
264    p
265}
266
267/// Render a sequence of workspace-segments as a workspace-absolute
268/// path string (`"/seg1/seg2/..."`).
269#[must_use]
270pub fn workspace_absolute_string_from_segments(segments: &[&PathSegment]) -> String {
271    let mut s = String::new();
272    for seg in segments {
273        s.push('/');
274        s.push_str(seg.as_str());
275    }
276    s
277}