objects/worktree/worktree_ignore.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Ignore pattern helpers for worktree operations.
3//!
4//! `.heddleignore` follows the same syntax as `.gitignore`: literal
5//! names, leading `/` for root-anchored rules, trailing `/` for
6//! directory-only matches, `*` and `**` glob wildcards, character
7//! classes (`[abc]`), and `!` negation (whitelist) rules. The matcher
8//! delegates to the `ignore` crate's gitignore implementation so the
9//! semantics are spec-compliant; only the patterns themselves are
10//! sourced from `.heddleignore` instead of `.gitignore`.
11//!
12//! Three "root-admin" pattern names — `.heddle`, `.heddleignore`,
13//! and `.git` — get an implicit leading `/` so they match only at
14//! the repo root. This preserves the long-standing invariant that a
15//! nested `.heddle/` directory (e.g. an `examples/calculator/.heddle`
16//! fixture) is *captured*, not silently dropped. Operators who want
17//! the gitignore-spec "match anywhere" behavior for those names can
18//! write `**/<name>` explicitly.
19
20use std::path::Path;
21
22use ignore::gitignore::{Gitignore, GitignoreBuilder};
23
24/// Whether `path` is covered by any of the `.heddleignore` patterns.
25///
26/// `is_dir = true` is passed to the underlying gitignore matcher so
27/// trailing-slash rules (`target/`, `build/`) match the bare directory
28/// entry itself — not just paths *inside* it. This preserves the
29/// pre-existing in-house matcher's behavior, where `build/` on a bare
30/// `build` path returned `true`. Walker callers depend on this to
31/// prune entire directory subtrees before descending; the alternative
32/// (`is_dir = false`) caused unnecessary traversal of `target/`,
33/// `node_modules/`, and other build trees.
34///
35/// Non-directory rules (`*.log`, `node_modules`, `[Mm]akefile`) are
36/// unaffected — gitignore-spec rules without a trailing slash match
37/// regardless of the `is_dir` flag.
38pub fn should_ignore(path: &Path, patterns: &[String]) -> bool {
39 build_worktree_ignore(patterns).is_ignored(path)
40}
41
42/// A compiled `.heddleignore` matcher. Compiling the glob set is the
43/// expensive part; matching a single path against an already-built
44/// matcher is cheap. Callers that test many paths against the same
45/// patterns (e.g. counting unignored entries across a large diff)
46/// should build the matcher once and reuse it, rather than paying the
47/// per-path compile cost that the [`should_ignore`] convenience wrapper
48/// incurs.
49pub struct WorktreeIgnoreMatcher {
50 gi: Gitignore,
51}
52
53impl WorktreeIgnoreMatcher {
54 /// Whether `path` is covered by any of the compiled patterns. See
55 /// [`should_ignore`] for the matching semantics (`is_dir = true`,
56 /// negation handling).
57 pub fn is_ignored(&self, path: &Path) -> bool {
58 matched(&self.gi, path)
59 }
60}
61
62/// Compile a [`WorktreeIgnoreMatcher`] from the given pattern strings,
63/// once, for reuse across many path checks. This is the compile-once
64/// counterpart to [`should_ignore`], which rebuilds the matcher on
65/// every call.
66pub fn build_worktree_ignore(patterns: &[String]) -> WorktreeIgnoreMatcher {
67 WorktreeIgnoreMatcher {
68 gi: build_matcher(patterns),
69 }
70}
71
72/// Build a `Gitignore` matcher from the given pattern strings,
73/// translating the root-admin special cases (`.heddle`,
74/// `.heddleignore`, `.git`) into root-anchored gitignore syntax.
75fn build_matcher(patterns: &[String]) -> Gitignore {
76 // Root path is symbolic — paths fed to `matched` are interpreted
77 // relative to it. Callers always pass repo-relative paths, so the
78 // root just needs to be a stable, in-memory anchor.
79 let mut builder = GitignoreBuilder::new("");
80 for pattern in patterns {
81 let line = canonical_line(pattern);
82 // `add_line` returns Err only on malformed glob syntax. We
83 // silently skip malformed user patterns — heddle's ingest path
84 // shouldn't error on a typo'd `.heddleignore` line; it should
85 // ignore the bad rule and keep going.
86 let _ = builder.add_line(None, &line);
87 }
88 // `build()` only fails on internal compile errors. The empty
89 // matcher (`Gitignore::empty()`) matches nothing — the right
90 // failure mode if we get here.
91 builder.build().unwrap_or_else(|_| Gitignore::empty())
92}
93
94/// Rewrite root-admin special-case names into root-anchored
95/// gitignore syntax. Pass-through for every other pattern, so
96/// gitignore semantics (`*`, `**`, `[abc]`, `!negation`, trailing
97/// `/`, leading `/`) all flow through verbatim.
98fn canonical_line(pattern: &str) -> String {
99 match pattern {
100 ".heddle" => "/.heddle".to_string(),
101 ".heddleignore" => "/.heddleignore".to_string(),
102 ".git" => "/.git".to_string(),
103 other => other.to_string(),
104 }
105}
106
107/// Apply the matcher to a relative path. Whitelist (`!negation`)
108/// rules unset the match; we surface only the `Ignore` outcome.
109///
110/// `is_dir = true`: trailing-slash rules (`build/`) match the bare
111/// directory entry as well as paths inside it. See the docstring on
112/// `should_ignore` for the migration rationale.
113fn matched(gi: &Gitignore, path: &Path) -> bool {
114 matches!(
115 gi.matched_path_or_any_parents(path, /* is_dir */ true),
116 ignore::Match::Ignore(_)
117 )
118}
119
120#[cfg(test)]
121mod tests {
122 use std::path::PathBuf;
123
124 use super::*;
125
126 #[test]
127 fn test_glob_extension() {
128 let patterns = vec!["*.log".to_string()];
129 assert!(should_ignore(&PathBuf::from("test.log"), &patterns));
130 assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
131 assert!(!should_ignore(&PathBuf::from("test.txt"), &patterns));
132 }
133
134 #[test]
135 fn test_directory_pattern() {
136 let patterns = vec!["build/".to_string()];
137 assert!(should_ignore(&PathBuf::from("build/output.txt"), &patterns));
138 // Bare directory match: walker callers ask `should_ignore` to
139 // decide whether to prune `build/` before descending. With
140 // `is_dir = true` plumbed into the gitignore matcher, the
141 // trailing-slash rule fires on the directory entry itself.
142 // Without this, walks of large dependency / build trees
143 // (`target/`, `node_modules/`) recurse unnecessarily.
144 assert!(should_ignore(&PathBuf::from("build"), &patterns));
145 assert!(should_ignore(&PathBuf::from("build/anything"), &patterns));
146 assert!(!should_ignore(&PathBuf::from("builder.txt"), &patterns));
147 }
148
149 #[test]
150 fn dir_only_rule_covers_symlinked_deps_dir() {
151 // heddle#303: a `node_modules` *symlink* (used as a workaround
152 // for the isolated-checkout hydrate gap) must be covered by a
153 // `node_modules/` (dir-only) rule, not treated as an uncaptured
154 // path that silently blocks `ready`. The matcher is path-based
155 // and always probes with `is_dir = true`, so it cannot — and
156 // must not — distinguish a symlink-to-dir from a real directory:
157 // the trailing-slash rule fires on the bare `node_modules` entry
158 // either way. Walker/scan callers never descend a symlink, so
159 // this is the entry that decides whether the link is ignored.
160 let patterns = vec!["node_modules/".to_string()];
161 assert!(should_ignore(&PathBuf::from("node_modules"), &patterns));
162 assert!(should_ignore(
163 &PathBuf::from("nested/node_modules"),
164 &patterns
165 ));
166 }
167
168 #[test]
169 fn test_simple_pattern() {
170 let patterns = vec!["node_modules".to_string()];
171 assert!(should_ignore(
172 &PathBuf::from("node_modules/package.json"),
173 &patterns
174 ));
175 assert!(!should_ignore(&PathBuf::from("src/main.rs"), &patterns));
176 }
177
178 #[test]
179 fn test_simple_pattern_does_not_match_prefixes() {
180 let patterns = vec!["target".to_string()];
181 assert!(should_ignore(
182 &PathBuf::from("target/output.txt"),
183 &patterns
184 ));
185 assert!(should_ignore(&PathBuf::from("build/target/app"), &patterns));
186 assert!(!should_ignore(&PathBuf::from("target.txt"), &patterns));
187 assert!(!should_ignore(
188 &PathBuf::from("targeted/output.txt"),
189 &patterns
190 ));
191 }
192
193 #[test]
194 fn test_root_admin_patterns_do_not_ignore_nested_paths() {
195 let patterns = vec![".heddle".to_string(), ".heddleignore".to_string()];
196 assert!(should_ignore(&PathBuf::from(".heddle/objects"), &patterns));
197 assert!(should_ignore(
198 &PathBuf::from(".heddle/state/index.bin"),
199 &patterns
200 ));
201 assert!(should_ignore(&PathBuf::from(".heddleignore"), &patterns));
202 assert!(!should_ignore(
203 &PathBuf::from("examples/calculator/.heddle/objects"),
204 &patterns
205 ));
206 assert!(!should_ignore(
207 &PathBuf::from("examples/calculator/.heddle/state/index.bin"),
208 &patterns
209 ));
210 assert!(!should_ignore(
211 &PathBuf::from("examples/calculator/.heddleignore"),
212 &patterns
213 ));
214 }
215
216 // ---- New gitignore-spec coverage ----
217
218 #[test]
219 fn test_path_relative_glob_matches_specific_directory_only() {
220 // `config/*.toml` is the case the user called out — a glob
221 // anchored to a specific subdirectory, with `*` matching one
222 // path segment. Plain `secrets.toml` at the root must NOT be
223 // ignored.
224 let patterns = vec!["config/*.toml".to_string()];
225 assert!(should_ignore(
226 &PathBuf::from("config/secrets.toml"),
227 &patterns
228 ));
229 assert!(should_ignore(
230 &PathBuf::from("config/database.toml"),
231 &patterns
232 ));
233 assert!(!should_ignore(&PathBuf::from("secrets.toml"), &patterns));
234 assert!(!should_ignore(
235 &PathBuf::from("other/secrets.toml"),
236 &patterns
237 ));
238 }
239
240 #[test]
241 fn test_double_star_recursive_glob_descends_directories() {
242 // `**/*.pem` matches at any depth — the canonical "find every
243 // PEM key under any directory" pattern.
244 let patterns = vec!["**/*.pem".to_string()];
245 assert!(should_ignore(&PathBuf::from("dev.pem"), &patterns));
246 assert!(should_ignore(&PathBuf::from("keys/dev.pem"), &patterns));
247 assert!(should_ignore(
248 &PathBuf::from("nested/deeper/key.pem"),
249 &patterns
250 ));
251 assert!(!should_ignore(&PathBuf::from("dev.txt"), &patterns));
252 }
253
254 #[test]
255 fn test_negation_rule_whitelists_a_path() {
256 // `*.log` then `!keep.log` — the negation rule unsets the
257 // earlier match for that specific name.
258 let patterns = vec!["*.log".to_string(), "!keep.log".to_string()];
259 assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
260 assert!(!should_ignore(&PathBuf::from("keep.log"), &patterns));
261 }
262
263 #[test]
264 fn test_leading_slash_anchors_to_root_only() {
265 // `/build` (root-anchored) ignores the top-level `build/` but
266 // not a nested `nested/build/` directory. Distinct semantics
267 // from the bare `build` pattern, which matches anywhere.
268 let patterns = vec!["/build".to_string()];
269 assert!(should_ignore(&PathBuf::from("build/output"), &patterns));
270 assert!(!should_ignore(
271 &PathBuf::from("nested/build/file"),
272 &patterns
273 ));
274 }
275
276 #[test]
277 fn test_character_class_matches_set() {
278 // `[Mm]akefile` — matches uppercase or lowercase variants.
279 // Standard gitignore character class.
280 let patterns = vec!["[Mm]akefile".to_string()];
281 assert!(should_ignore(&PathBuf::from("Makefile"), &patterns));
282 assert!(should_ignore(&PathBuf::from("makefile"), &patterns));
283 assert!(!should_ignore(&PathBuf::from("Rakefile"), &patterns));
284 }
285
286 #[test]
287 fn test_comments_and_blank_lines_are_handled_upstream() {
288 // The matcher itself accepts every line it's given verbatim
289 // (gitignore-spec treats `#` as a comment marker). Repository
290 // strips comments before calling, but verify the matcher
291 // tolerates them so a future refactor can stop stripping
292 // without behavior change.
293 let patterns = vec!["# comment".to_string(), "".to_string(), "*.log".to_string()];
294 assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
295 assert!(!should_ignore(&PathBuf::from("foo.txt"), &patterns));
296 }
297
298 #[test]
299 fn prebuilt_matcher_matches_same_as_should_ignore() {
300 // The compile-once API must produce identical match results to
301 // the per-call `should_ignore` wrapper — it only hoists WHEN
302 // the glob set is compiled, not WHAT it matches.
303 let patterns = vec!["node_modules".to_string(), "*.log".to_string()];
304 let matcher = build_worktree_ignore(&patterns);
305 let cases = [
306 "node_modules/left-pad/index.js",
307 "debug.log",
308 "src/main.rs",
309 "config/app.toml",
310 ];
311 for case in cases {
312 let p = PathBuf::from(case);
313 assert_eq!(
314 matcher.is_ignored(&p),
315 should_ignore(&p, &patterns),
316 "prebuilt matcher and should_ignore disagree on {case}"
317 );
318 }
319 // Reusing the same matcher across many paths is the whole point;
320 // assert a couple of explicit outcomes too.
321 assert!(matcher.is_ignored(&PathBuf::from("node_modules/x")));
322 assert!(!matcher.is_ignored(&PathBuf::from("src/lib.rs")));
323 }
324
325 #[test]
326 fn test_malformed_pattern_does_not_break_matcher() {
327 // Unbalanced bracket: builder errors silently and the
328 // pattern is dropped. Other rules continue to apply.
329 let patterns = vec!["[unbalanced".to_string(), "*.log".to_string()];
330 assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
331 }
332}