haz_cache_key/pattern_walk.rs
1//! Shared kernel for resolving a task's [`InputSpec`] / [`OutputSpec`]
2//! patterns against the filesystem.
3//!
4//! Two distinct consumers exist:
5//!
6//! - Cache-key input resolution resolves each declared
7//! [`InputSpec`](haz_domain::path::InputSpec), reading the bytes of
8//! every match and folding them into the cache-key contents.
9//! - The executor's output-store path resolves each declared
10//! [`OutputSpec`](haz_domain::path::OutputSpec) after a successful
11//! run, recording the mode and host path of every match for the
12//! cache library to ingest.
13//!
14//! Both consumers project workspace-anchored patterns onto the host
15//! filesystem the same way (`literal_workspace_segments` +
16//! `host_path_from_segments` for literals, `glob_walk_origin` + a
17//! recursive walker for globs); the only difference is what each one
18//! does once a regular-file match has been identified. This module
19//! captures the common projection helpers and the recursive walker,
20//! and parameterises the per-match work behind the [`GlobMatchAction`]
21//! trait.
22//!
23//! [`InputSpec`]: haz_domain::path::InputSpec
24//! [`OutputSpec`]: haz_domain::path::OutputSpec
25
26use std::path::{Path, PathBuf};
27
28use haz_domain::path::{HazPath, PathAnchor, PathSegment, ProjectRoot};
29use haz_vfs::{DirEntry, EntryKind, Filesystem, FsError};
30
31/// Per-consumer hooks for [`GlobWalk::walk`].
32///
33/// A type implementing this trait pairs the consumer's owned output
34/// shape (e.g. `OwnedInputFile`, `OwnedOutputFile`) with its typed
35/// error vocabulary, and supplies the two callback points the walker
36/// invokes:
37///
38/// - [`Self::map_walk_error`] turns an [`FsError`] raised during
39/// directory traversal (a failed `read_dir`, a failed
40/// metadata-follow on a symlink) into the consumer's error type.
41/// - [`Self::on_match`] runs the per-match work for one regular-file
42/// entry whose walk-relative path matched the glob: read bytes and
43/// hash them, read mode bits, etc. The walker hands over the
44/// filesystem handle, the host path, and the synthesized
45/// workspace-absolute path string, and lets the action push the
46/// resulting [`Self::Output`] into the caller-supplied vec.
47pub trait GlobMatchAction<F: Filesystem> {
48 /// One entry the action contributes per matched file.
49 type Output;
50 /// Typed error the action surfaces from its callback points.
51 type Error;
52
53 /// Wrap an [`FsError`] raised while the walker was reading a
54 /// directory or following a symlink. `root` is the host path the
55 /// walker was operating on at the moment of failure.
56 fn map_walk_error(&self, root: PathBuf, source: FsError) -> Self::Error;
57
58 /// Perform the consumer-specific work for one matched regular
59 /// file.
60 ///
61 /// - `host_path` is the host filesystem path of the matched
62 /// entry (the symlink's own path, not its target, for
63 /// symlink-to-file matches).
64 /// - `workspace_absolute_path` is the workspace-anchored path
65 /// string (rooted at `/`) the consumer should record alongside
66 /// the output entry.
67 /// - `out` is the caller's accumulator; the action appends one
68 /// [`Self::Output`] on success.
69 ///
70 /// # Errors
71 ///
72 /// Returns the action's [`Self::Error`] if its per-match work
73 /// fails (for example, reading or hashing the matched file).
74 fn on_match(
75 &self,
76 fs: &F,
77 host_path: &Path,
78 workspace_absolute_path: String,
79 out: &mut Vec<Self::Output>,
80 ) -> Result<(), Self::Error>;
81}
82
83/// Per-glob walk state: holds every immutable input the recursion
84/// needs, so the recursive methods take only the changing parts.
85pub struct GlobWalk<'a, F: Filesystem, A: GlobMatchAction<F>> {
86 /// Filesystem the walker reads directory entries and follows
87 /// symlinks through.
88 pub fs: &'a F,
89 /// Compiled glob matcher.
90 pub matcher: &'a globset::GlobMatcher,
91 /// Either `"/"` (workspace-absolute glob) or `""`
92 /// (project-relative glob). Prepended to the joined walk-relative
93 /// segments to form the candidate string fed to the matcher.
94 pub candidate_prefix: &'static str,
95 /// Either `""` (workspace-absolute glob, or implicit-mode project)
96 /// or `"/seg1/seg2"` (project-relative glob in a nested project).
97 /// Prepended (followed by `"/"`) to the joined walk-relative
98 /// segments to form an entry's `workspace_absolute_path`.
99 pub workspace_prefix: String,
100 /// Per-match action: emits one [`GlobMatchAction::Output`] per matched file and
101 /// owns the consumer's error vocabulary.
102 pub action: &'a A,
103}
104
105impl<F: Filesystem, A: GlobMatchAction<F>> GlobWalk<'_, F, A> {
106 /// Recursively walk `walk_dir`, descending into subdirectories
107 /// and following symlinks; invoke
108 /// [`GlobMatchAction::on_match`] for every regular-file entry
109 /// whose joined walk-relative path matches the configured glob.
110 ///
111 /// `walk_rel` is the path of the walker relative to the original
112 /// walk origin, expressed as a stack of UTF-8 segment strings.
113 /// The walker pushes and pops in lock-step with the recursion so
114 /// the same buffer threads through every level.
115 ///
116 /// # Errors
117 ///
118 /// Returns the action's error type (via
119 /// [`GlobMatchAction::map_walk_error`] or
120 /// [`GlobMatchAction::on_match`]) if a directory read, a symlink
121 /// metadata follow, or the per-match work fails.
122 pub fn walk(
123 &self,
124 walk_dir: &Path,
125 walk_rel: &mut Vec<String>,
126 out: &mut Vec<A::Output>,
127 ) -> Result<(), A::Error> {
128 let entries = self
129 .fs
130 .read_dir(walk_dir)
131 .map_err(|source| self.action.map_walk_error(walk_dir.to_path_buf(), source))?;
132 for entry in entries {
133 let Some(name) = entry
134 .path
135 .file_name()
136 .and_then(|n| n.to_str())
137 .map(str::to_owned)
138 else {
139 // Non-UTF-8 names cannot appear in any haz pattern
140 // (PathSegment forbids them at parse time), so they
141 // cannot match any glob; skipping them loses no
142 // information that the cache key could otherwise
143 // capture.
144 continue;
145 };
146 walk_rel.push(name);
147 let r = self.visit_entry(&entry, walk_rel, out);
148 walk_rel.pop();
149 r?;
150 }
151 Ok(())
152 }
153
154 fn visit_entry(
155 &self,
156 entry: &DirEntry,
157 walk_rel: &mut Vec<String>,
158 out: &mut Vec<A::Output>,
159 ) -> Result<(), A::Error> {
160 match entry.metadata.kind {
161 EntryKind::Dir => self.walk(&entry.path, walk_rel, out),
162 EntryKind::File => self.maybe_match_file(&entry.path, walk_rel, out),
163 EntryKind::Symlink => {
164 let target_meta = self
165 .fs
166 .metadata(&entry.path)
167 .map_err(|source| self.action.map_walk_error(entry.path.clone(), source))?;
168 match target_meta.kind {
169 EntryKind::Dir => self.walk(&entry.path, walk_rel, out),
170 EntryKind::File => self.maybe_match_file(&entry.path, walk_rel, out),
171 EntryKind::Symlink
172 | EntryKind::BlockDevice
173 | EntryKind::CharDevice
174 | EntryKind::Fifo
175 | EntryKind::Socket => Ok(()),
176 }
177 }
178 EntryKind::BlockDevice
179 | EntryKind::CharDevice
180 | EntryKind::Fifo
181 | EntryKind::Socket => Ok(()),
182 }
183 }
184
185 fn maybe_match_file(
186 &self,
187 host_path: &Path,
188 walk_rel: &[String],
189 out: &mut Vec<A::Output>,
190 ) -> Result<(), A::Error> {
191 let candidate = format!("{}{}", self.candidate_prefix, walk_rel.join("/"));
192 if !self.matcher.is_match(&candidate) {
193 return Ok(());
194 }
195 let workspace_absolute_path = format!("{}/{}", self.workspace_prefix, walk_rel.join("/"));
196 self.action
197 .on_match(self.fs, host_path, workspace_absolute_path, out)
198 }
199}
200
201/// Lift a literal [`HazPath`] (under the supplied [`ProjectRoot`]) into
202/// its workspace-absolute segment view.
203///
204/// - [`HazPath::WorkspaceAbsolute`] passes through unchanged.
205/// - [`HazPath::ProjectRelative`] under [`ProjectRoot::Nested`] is
206/// prefixed by the project root's segments.
207/// - [`HazPath::ProjectRelative`] under [`ProjectRoot::WorkspaceRoot`]
208/// (implicit-mode project per `DISC-003`) is workspace-absolute
209/// already.
210#[must_use]
211pub fn literal_workspace_segments<'a>(
212 haz_path: &'a HazPath,
213 project_root: &'a ProjectRoot,
214) -> Vec<&'a PathSegment> {
215 match (haz_path, project_root) {
216 (HazPath::WorkspaceAbsolute(segs), _)
217 | (HazPath::ProjectRelative(segs), ProjectRoot::WorkspaceRoot) => segs.iter().collect(),
218 (HazPath::ProjectRelative(rel), ProjectRoot::Nested(cp)) => {
219 let mut v: Vec<&PathSegment> = cp.segments().iter().collect();
220 v.extend(rel.iter());
221 v
222 }
223 }
224}
225
226/// Compute the glob-walk origin: the host directory at which the walk
227/// starts, the workspace-absolute prefix to prepend to every matched
228/// path (`"/proj_root"` for `ProjectRelative` + [`ProjectRoot::Nested`];
229/// `""` otherwise), and the candidate-string prefix that turns
230/// walk-relative segments into the spelling globset matches against
231/// (`"/"` for [`PathAnchor::WorkspaceAbsolute`]; `""` for
232/// [`PathAnchor::ProjectRelative`], matching each pattern's `Display`
233/// form).
234#[must_use]
235pub fn glob_walk_origin(
236 workspace_host: &Path,
237 project_root: &ProjectRoot,
238 anchor: PathAnchor,
239) -> (PathBuf, String, &'static str) {
240 match (anchor, project_root) {
241 (PathAnchor::WorkspaceAbsolute, _) => (workspace_host.to_path_buf(), String::new(), "/"),
242 (PathAnchor::ProjectRelative, ProjectRoot::WorkspaceRoot) => {
243 (workspace_host.to_path_buf(), String::new(), "")
244 }
245 (PathAnchor::ProjectRelative, ProjectRoot::Nested(cp)) => {
246 let segs: Vec<&PathSegment> = cp.segments().iter().collect();
247 (
248 host_path_from_segments(workspace_host, &segs),
249 workspace_absolute_string_from_segments(&segs),
250 "",
251 )
252 }
253 }
254}
255
256/// Build a host filesystem path by pushing each workspace-segment's
257/// string form onto `workspace_root`.
258#[must_use]
259pub fn host_path_from_segments(workspace_root: &Path, segments: &[&PathSegment]) -> PathBuf {
260 let mut p = workspace_root.to_path_buf();
261 for s in segments {
262 p.push(s.as_str());
263 }
264 p
265}
266
267/// Render a sequence of workspace-segments as a workspace-absolute
268/// path string (`"/seg1/seg2/..."`).
269#[must_use]
270pub fn workspace_absolute_string_from_segments(segments: &[&PathSegment]) -> String {
271 let mut s = String::new();
272 for seg in segments {
273 s.push('/');
274 s.push_str(seg.as_str());
275 }
276 s
277}