Skip to main content

objects/worktree/
worktree_ignore.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Ignore pattern helpers for worktree operations.
3//!
4//! `.heddleignore` follows the same syntax as `.gitignore`: literal
5//! names, leading `/` for root-anchored rules, trailing `/` for
6//! directory-only matches, `*` and `**` glob wildcards, character
7//! classes (`[abc]`), and `!` negation (whitelist) rules. The matcher
8//! delegates to the `ignore` crate's gitignore implementation so the
9//! semantics are spec-compliant; only the patterns themselves are
10//! sourced from `.heddleignore` instead of `.gitignore`.
11//!
12//! Three "root-admin" pattern names — `.heddle`, `.heddleignore`,
13//! and `.git` — get an implicit leading `/` so they match only at
14//! the repo root. This preserves the long-standing invariant that a
15//! nested `.heddle/` directory (e.g. an `examples/calculator/.heddle`
16//! fixture) is *captured*, not silently dropped. Operators who want
17//! the gitignore-spec "match anywhere" behavior for those names can
18//! write `**/<name>` explicitly.
19
20use std::path::Path;
21
22use ignore::gitignore::{Gitignore, GitignoreBuilder};
23
24/// Whether `path` is covered by any of the `.heddleignore` patterns.
25///
26/// `is_dir = true` is passed to the underlying gitignore matcher so
27/// trailing-slash rules (`target/`, `build/`) match the bare directory
28/// entry itself — not just paths *inside* it. This preserves the
29/// pre-existing in-house matcher's behavior, where `build/` on a bare
30/// `build` path returned `true`. Walker callers depend on this to
31/// prune entire directory subtrees before descending; the alternative
32/// (`is_dir = false`) caused unnecessary traversal of `target/`,
33/// `node_modules/`, and other build trees.
34///
35/// Non-directory rules (`*.log`, `node_modules`, `[Mm]akefile`) are
36/// unaffected — gitignore-spec rules without a trailing slash match
37/// regardless of the `is_dir` flag.
38pub fn should_ignore(path: &Path, patterns: &[String]) -> bool {
39    build_worktree_ignore(patterns).is_ignored(path)
40}
41
42/// A compiled `.heddleignore` matcher. Compiling the glob set is the
43/// expensive part; matching a single path against an already-built
44/// matcher is cheap. Callers that test many paths against the same
45/// patterns (e.g. counting unignored entries across a large diff)
46/// should build the matcher once and reuse it, rather than paying the
47/// per-path compile cost that the [`should_ignore`] convenience wrapper
48/// incurs.
49pub struct WorktreeIgnoreMatcher {
50    gi: Gitignore,
51}
52
53impl WorktreeIgnoreMatcher {
54    /// Whether `path` is covered by any of the compiled patterns. See
55    /// [`should_ignore`] for the matching semantics (`is_dir = true`,
56    /// negation handling).
57    pub fn is_ignored(&self, path: &Path) -> bool {
58        matched(&self.gi, path)
59    }
60}
61
62/// Compile a [`WorktreeIgnoreMatcher`] from the given pattern strings,
63/// once, for reuse across many path checks. This is the compile-once
64/// counterpart to [`should_ignore`], which rebuilds the matcher on
65/// every call.
66pub fn build_worktree_ignore(patterns: &[String]) -> WorktreeIgnoreMatcher {
67    WorktreeIgnoreMatcher {
68        gi: build_matcher(patterns),
69    }
70}
71
72/// Build a `Gitignore` matcher from the given pattern strings,
73/// translating the root-admin special cases (`.heddle`,
74/// `.heddleignore`, `.git`) into root-anchored gitignore syntax.
75fn build_matcher(patterns: &[String]) -> Gitignore {
76    // Root path is symbolic — paths fed to `matched` are interpreted
77    // relative to it. Callers always pass repo-relative paths, so the
78    // root just needs to be a stable, in-memory anchor.
79    let mut builder = GitignoreBuilder::new("");
80    for pattern in patterns {
81        let line = canonical_line(pattern);
82        // `add_line` returns Err only on malformed glob syntax. We
83        // silently skip malformed user patterns — heddle's ingest path
84        // shouldn't error on a typo'd `.heddleignore` line; it should
85        // ignore the bad rule and keep going.
86        let _ = builder.add_line(None, &line);
87    }
88    // `build()` only fails on internal compile errors. The empty
89    // matcher (`Gitignore::empty()`) matches nothing — the right
90    // failure mode if we get here.
91    builder.build().unwrap_or_else(|_| Gitignore::empty())
92}
93
94/// Rewrite root-admin special-case names into root-anchored
95/// gitignore syntax. Pass-through for every other pattern, so
96/// gitignore semantics (`*`, `**`, `[abc]`, `!negation`, trailing
97/// `/`, leading `/`) all flow through verbatim.
98fn canonical_line(pattern: &str) -> String {
99    match pattern {
100        ".heddle" => "/.heddle".to_string(),
101        ".heddleignore" => "/.heddleignore".to_string(),
102        ".git" => "/.git".to_string(),
103        other => other.to_string(),
104    }
105}
106
107/// Apply the matcher to a relative path. Whitelist (`!negation`)
108/// rules unset the match; we surface only the `Ignore` outcome.
109///
110/// `is_dir = true`: trailing-slash rules (`build/`) match the bare
111/// directory entry as well as paths inside it. See the docstring on
112/// `should_ignore` for the migration rationale.
113fn matched(gi: &Gitignore, path: &Path) -> bool {
114    matches!(
115        gi.matched_path_or_any_parents(path, /* is_dir */ true),
116        ignore::Match::Ignore(_)
117    )
118}
119
120#[cfg(test)]
121mod tests {
122    use std::path::PathBuf;
123
124    use super::*;
125
126    #[test]
127    fn test_glob_extension() {
128        let patterns = vec!["*.log".to_string()];
129        assert!(should_ignore(&PathBuf::from("test.log"), &patterns));
130        assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
131        assert!(!should_ignore(&PathBuf::from("test.txt"), &patterns));
132    }
133
134    #[test]
135    fn test_directory_pattern() {
136        let patterns = vec!["build/".to_string()];
137        assert!(should_ignore(&PathBuf::from("build/output.txt"), &patterns));
138        // Bare directory match: walker callers ask `should_ignore` to
139        // decide whether to prune `build/` before descending. With
140        // `is_dir = true` plumbed into the gitignore matcher, the
141        // trailing-slash rule fires on the directory entry itself.
142        // Without this, walks of large dependency / build trees
143        // (`target/`, `node_modules/`) recurse unnecessarily.
144        assert!(should_ignore(&PathBuf::from("build"), &patterns));
145        assert!(should_ignore(&PathBuf::from("build/anything"), &patterns));
146        assert!(!should_ignore(&PathBuf::from("builder.txt"), &patterns));
147    }
148
149    #[test]
150    fn dir_only_rule_covers_symlinked_deps_dir() {
151        // heddle#303: a `node_modules` *symlink* (used as a workaround
152        // for the isolated-checkout hydrate gap) must be covered by a
153        // `node_modules/` (dir-only) rule, not treated as an uncaptured
154        // path that silently blocks `ready`. The matcher is path-based
155        // and always probes with `is_dir = true`, so it cannot — and
156        // must not — distinguish a symlink-to-dir from a real directory:
157        // the trailing-slash rule fires on the bare `node_modules` entry
158        // either way. Walker/scan callers never descend a symlink, so
159        // this is the entry that decides whether the link is ignored.
160        let patterns = vec!["node_modules/".to_string()];
161        assert!(should_ignore(&PathBuf::from("node_modules"), &patterns));
162        assert!(should_ignore(
163            &PathBuf::from("nested/node_modules"),
164            &patterns
165        ));
166    }
167
168    #[test]
169    fn test_simple_pattern() {
170        let patterns = vec!["node_modules".to_string()];
171        assert!(should_ignore(
172            &PathBuf::from("node_modules/package.json"),
173            &patterns
174        ));
175        assert!(!should_ignore(&PathBuf::from("src/main.rs"), &patterns));
176    }
177
178    #[test]
179    fn test_simple_pattern_does_not_match_prefixes() {
180        let patterns = vec!["target".to_string()];
181        assert!(should_ignore(
182            &PathBuf::from("target/output.txt"),
183            &patterns
184        ));
185        assert!(should_ignore(&PathBuf::from("build/target/app"), &patterns));
186        assert!(!should_ignore(&PathBuf::from("target.txt"), &patterns));
187        assert!(!should_ignore(
188            &PathBuf::from("targeted/output.txt"),
189            &patterns
190        ));
191    }
192
193    #[test]
194    fn test_root_admin_patterns_do_not_ignore_nested_paths() {
195        let patterns = vec![".heddle".to_string(), ".heddleignore".to_string()];
196        assert!(should_ignore(&PathBuf::from(".heddle/objects"), &patterns));
197        assert!(should_ignore(
198            &PathBuf::from(".heddle/state/index.bin"),
199            &patterns
200        ));
201        assert!(should_ignore(&PathBuf::from(".heddleignore"), &patterns));
202        assert!(!should_ignore(
203            &PathBuf::from("examples/calculator/.heddle/objects"),
204            &patterns
205        ));
206        assert!(!should_ignore(
207            &PathBuf::from("examples/calculator/.heddle/state/index.bin"),
208            &patterns
209        ));
210        assert!(!should_ignore(
211            &PathBuf::from("examples/calculator/.heddleignore"),
212            &patterns
213        ));
214    }
215
216    // ---- New gitignore-spec coverage ----
217
218    #[test]
219    fn test_path_relative_glob_matches_specific_directory_only() {
220        // `config/*.toml` is the case the user called out — a glob
221        // anchored to a specific subdirectory, with `*` matching one
222        // path segment. Plain `secrets.toml` at the root must NOT be
223        // ignored.
224        let patterns = vec!["config/*.toml".to_string()];
225        assert!(should_ignore(
226            &PathBuf::from("config/secrets.toml"),
227            &patterns
228        ));
229        assert!(should_ignore(
230            &PathBuf::from("config/database.toml"),
231            &patterns
232        ));
233        assert!(!should_ignore(&PathBuf::from("secrets.toml"), &patterns));
234        assert!(!should_ignore(
235            &PathBuf::from("other/secrets.toml"),
236            &patterns
237        ));
238    }
239
240    #[test]
241    fn test_double_star_recursive_glob_descends_directories() {
242        // `**/*.pem` matches at any depth — the canonical "find every
243        // PEM key under any directory" pattern.
244        let patterns = vec!["**/*.pem".to_string()];
245        assert!(should_ignore(&PathBuf::from("dev.pem"), &patterns));
246        assert!(should_ignore(&PathBuf::from("keys/dev.pem"), &patterns));
247        assert!(should_ignore(
248            &PathBuf::from("nested/deeper/key.pem"),
249            &patterns
250        ));
251        assert!(!should_ignore(&PathBuf::from("dev.txt"), &patterns));
252    }
253
254    #[test]
255    fn test_negation_rule_whitelists_a_path() {
256        // `*.log` then `!keep.log` — the negation rule unsets the
257        // earlier match for that specific name.
258        let patterns = vec!["*.log".to_string(), "!keep.log".to_string()];
259        assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
260        assert!(!should_ignore(&PathBuf::from("keep.log"), &patterns));
261    }
262
263    #[test]
264    fn test_leading_slash_anchors_to_root_only() {
265        // `/build` (root-anchored) ignores the top-level `build/` but
266        // not a nested `nested/build/` directory. Distinct semantics
267        // from the bare `build` pattern, which matches anywhere.
268        let patterns = vec!["/build".to_string()];
269        assert!(should_ignore(&PathBuf::from("build/output"), &patterns));
270        assert!(!should_ignore(
271            &PathBuf::from("nested/build/file"),
272            &patterns
273        ));
274    }
275
276    #[test]
277    fn test_character_class_matches_set() {
278        // `[Mm]akefile` — matches uppercase or lowercase variants.
279        // Standard gitignore character class.
280        let patterns = vec!["[Mm]akefile".to_string()];
281        assert!(should_ignore(&PathBuf::from("Makefile"), &patterns));
282        assert!(should_ignore(&PathBuf::from("makefile"), &patterns));
283        assert!(!should_ignore(&PathBuf::from("Rakefile"), &patterns));
284    }
285
286    #[test]
287    fn test_comments_and_blank_lines_are_handled_upstream() {
288        // The matcher itself accepts every line it's given verbatim
289        // (gitignore-spec treats `#` as a comment marker). Repository
290        // strips comments before calling, but verify the matcher
291        // tolerates them so a future refactor can stop stripping
292        // without behavior change.
293        let patterns = vec!["# comment".to_string(), "".to_string(), "*.log".to_string()];
294        assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
295        assert!(!should_ignore(&PathBuf::from("foo.txt"), &patterns));
296    }
297
298    #[test]
299    fn prebuilt_matcher_matches_same_as_should_ignore() {
300        // The compile-once API must produce identical match results to
301        // the per-call `should_ignore` wrapper — it only hoists WHEN
302        // the glob set is compiled, not WHAT it matches.
303        let patterns = vec!["node_modules".to_string(), "*.log".to_string()];
304        let matcher = build_worktree_ignore(&patterns);
305        let cases = [
306            "node_modules/left-pad/index.js",
307            "debug.log",
308            "src/main.rs",
309            "config/app.toml",
310        ];
311        for case in cases {
312            let p = PathBuf::from(case);
313            assert_eq!(
314                matcher.is_ignored(&p),
315                should_ignore(&p, &patterns),
316                "prebuilt matcher and should_ignore disagree on {case}"
317            );
318        }
319        // Reusing the same matcher across many paths is the whole point;
320        // assert a couple of explicit outcomes too.
321        assert!(matcher.is_ignored(&PathBuf::from("node_modules/x")));
322        assert!(!matcher.is_ignored(&PathBuf::from("src/lib.rs")));
323    }
324
325    #[test]
326    fn test_malformed_pattern_does_not_break_matcher() {
327        // Unbalanced bracket: builder errors silently and the
328        // pattern is dropped. Other rules continue to apply.
329        let patterns = vec!["[unbalanced".to_string(), "*.log".to_string()];
330        assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
331    }
332}