Skip to main content

objects/worktree/
worktree_ignore.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Ignore pattern helpers for worktree operations.
3//!
4//! `.heddleignore` follows the same syntax as `.gitignore`: literal
5//! names, leading `/` for root-anchored rules, trailing `/` for
6//! directory-only matches, `*` and `**` glob wildcards, character
7//! classes (`[abc]`), and `!` negation (whitelist) rules. The matcher
8//! delegates to the `ignore` crate's gitignore implementation so the
9//! semantics are spec-compliant; only the patterns themselves are
10//! sourced from `.heddleignore` instead of `.gitignore`.
11//!
12//! Three "root-admin" pattern names — `.heddle`, `.heddleignore`,
13//! and `.git` — get an implicit leading `/` so they match only at
14//! the repo root. This preserves the long-standing invariant that a
15//! nested `.heddle/` directory (e.g. an `examples/calculator/.heddle`
16//! fixture) is *captured*, not silently dropped. Operators who want
17//! the gitignore-spec "match anywhere" behavior for those names can
18//! write `**/<name>` explicitly.
19
20use std::path::Path;
21
22use ignore::gitignore::{Gitignore, GitignoreBuilder};
23
24/// Whether `path` is covered by any of the `.heddleignore` patterns.
25///
26/// `is_dir = true` is passed to the underlying gitignore matcher so
27/// trailing-slash rules (`target/`, `build/`) match the bare directory
28/// entry itself — not just paths *inside* it. This preserves the
29/// pre-existing in-house matcher's behavior, where `build/` on a bare
30/// `build` path returned `true`. Walker callers depend on this to
31/// prune entire directory subtrees before descending; the alternative
32/// (`is_dir = false`) caused unnecessary traversal of `target/`,
33/// `node_modules/`, and other build trees.
34///
35/// Non-directory rules (`*.log`, `node_modules`, `[Mm]akefile`) are
36/// unaffected — gitignore-spec rules without a trailing slash match
37/// regardless of the `is_dir` flag.
38pub fn should_ignore(path: &Path, patterns: &[String]) -> bool {
39    matched(&build_matcher(patterns), path)
40}
41
42/// Build a `Gitignore` matcher from the given pattern strings,
43/// translating the root-admin special cases (`.heddle`,
44/// `.heddleignore`, `.git`) into root-anchored gitignore syntax.
45fn build_matcher(patterns: &[String]) -> Gitignore {
46    // Root path is symbolic — paths fed to `matched` are interpreted
47    // relative to it. Callers always pass repo-relative paths, so the
48    // root just needs to be a stable, in-memory anchor.
49    let mut builder = GitignoreBuilder::new("");
50    for pattern in patterns {
51        let line = canonical_line(pattern);
52        // `add_line` returns Err only on malformed glob syntax. We
53        // silently skip malformed user patterns — heddle's ingest path
54        // shouldn't error on a typo'd `.heddleignore` line; it should
55        // ignore the bad rule and keep going.
56        let _ = builder.add_line(None, &line);
57    }
58    // `build()` only fails on internal compile errors. The empty
59    // matcher (`Gitignore::empty()`) matches nothing — the right
60    // failure mode if we get here.
61    builder.build().unwrap_or_else(|_| Gitignore::empty())
62}
63
64/// Rewrite root-admin special-case names into root-anchored
65/// gitignore syntax. Pass-through for every other pattern, so
66/// gitignore semantics (`*`, `**`, `[abc]`, `!negation`, trailing
67/// `/`, leading `/`) all flow through verbatim.
68fn canonical_line(pattern: &str) -> String {
69    match pattern {
70        ".heddle" => "/.heddle".to_string(),
71        ".heddleignore" => "/.heddleignore".to_string(),
72        ".git" => "/.git".to_string(),
73        other => other.to_string(),
74    }
75}
76
77/// Apply the matcher to a relative path. Whitelist (`!negation`)
78/// rules unset the match; we surface only the `Ignore` outcome.
79///
80/// `is_dir = true`: trailing-slash rules (`build/`) match the bare
81/// directory entry as well as paths inside it. See the docstring on
82/// `should_ignore` for the migration rationale.
83fn matched(gi: &Gitignore, path: &Path) -> bool {
84    matches!(
85        gi.matched_path_or_any_parents(path, /* is_dir */ true),
86        ignore::Match::Ignore(_)
87    )
88}
89
90#[cfg(test)]
91mod tests {
92    use std::path::PathBuf;
93
94    use super::*;
95
96    #[test]
97    fn test_glob_extension() {
98        let patterns = vec!["*.log".to_string()];
99        assert!(should_ignore(&PathBuf::from("test.log"), &patterns));
100        assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
101        assert!(!should_ignore(&PathBuf::from("test.txt"), &patterns));
102    }
103
104    #[test]
105    fn test_directory_pattern() {
106        let patterns = vec!["build/".to_string()];
107        assert!(should_ignore(&PathBuf::from("build/output.txt"), &patterns));
108        // Bare directory match: walker callers ask `should_ignore` to
109        // decide whether to prune `build/` before descending. With
110        // `is_dir = true` plumbed into the gitignore matcher, the
111        // trailing-slash rule fires on the directory entry itself.
112        // Without this, walks of large dependency / build trees
113        // (`target/`, `node_modules/`) recurse unnecessarily.
114        assert!(should_ignore(&PathBuf::from("build"), &patterns));
115        assert!(should_ignore(&PathBuf::from("build/anything"), &patterns));
116        assert!(!should_ignore(&PathBuf::from("builder.txt"), &patterns));
117    }
118
119    #[test]
120    fn test_simple_pattern() {
121        let patterns = vec!["node_modules".to_string()];
122        assert!(should_ignore(
123            &PathBuf::from("node_modules/package.json"),
124            &patterns
125        ));
126        assert!(!should_ignore(&PathBuf::from("src/main.rs"), &patterns));
127    }
128
129    #[test]
130    fn test_simple_pattern_does_not_match_prefixes() {
131        let patterns = vec!["target".to_string()];
132        assert!(should_ignore(
133            &PathBuf::from("target/output.txt"),
134            &patterns
135        ));
136        assert!(should_ignore(&PathBuf::from("build/target/app"), &patterns));
137        assert!(!should_ignore(&PathBuf::from("target.txt"), &patterns));
138        assert!(!should_ignore(
139            &PathBuf::from("targeted/output.txt"),
140            &patterns
141        ));
142    }
143
144    #[test]
145    fn test_root_admin_patterns_do_not_ignore_nested_paths() {
146        let patterns = vec![".heddle".to_string(), ".heddleignore".to_string()];
147        assert!(should_ignore(&PathBuf::from(".heddle/objects"), &patterns));
148        assert!(should_ignore(
149            &PathBuf::from(".heddle/state/index.bin"),
150            &patterns
151        ));
152        assert!(should_ignore(&PathBuf::from(".heddleignore"), &patterns));
153        assert!(!should_ignore(
154            &PathBuf::from("examples/calculator/.heddle/objects"),
155            &patterns
156        ));
157        assert!(!should_ignore(
158            &PathBuf::from("examples/calculator/.heddle/state/index.bin"),
159            &patterns
160        ));
161        assert!(!should_ignore(
162            &PathBuf::from("examples/calculator/.heddleignore"),
163            &patterns
164        ));
165    }
166
167    // ---- New gitignore-spec coverage ----
168
169    #[test]
170    fn test_path_relative_glob_matches_specific_directory_only() {
171        // `config/*.toml` is the case the user called out — a glob
172        // anchored to a specific subdirectory, with `*` matching one
173        // path segment. Plain `secrets.toml` at the root must NOT be
174        // ignored.
175        let patterns = vec!["config/*.toml".to_string()];
176        assert!(should_ignore(
177            &PathBuf::from("config/secrets.toml"),
178            &patterns
179        ));
180        assert!(should_ignore(
181            &PathBuf::from("config/database.toml"),
182            &patterns
183        ));
184        assert!(!should_ignore(&PathBuf::from("secrets.toml"), &patterns));
185        assert!(!should_ignore(
186            &PathBuf::from("other/secrets.toml"),
187            &patterns
188        ));
189    }
190
191    #[test]
192    fn test_double_star_recursive_glob_descends_directories() {
193        // `**/*.pem` matches at any depth — the canonical "find every
194        // PEM key under any directory" pattern.
195        let patterns = vec!["**/*.pem".to_string()];
196        assert!(should_ignore(&PathBuf::from("dev.pem"), &patterns));
197        assert!(should_ignore(&PathBuf::from("keys/dev.pem"), &patterns));
198        assert!(should_ignore(
199            &PathBuf::from("nested/deeper/key.pem"),
200            &patterns
201        ));
202        assert!(!should_ignore(&PathBuf::from("dev.txt"), &patterns));
203    }
204
205    #[test]
206    fn test_negation_rule_whitelists_a_path() {
207        // `*.log` then `!keep.log` — the negation rule unsets the
208        // earlier match for that specific name.
209        let patterns = vec!["*.log".to_string(), "!keep.log".to_string()];
210        assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
211        assert!(!should_ignore(&PathBuf::from("keep.log"), &patterns));
212    }
213
214    #[test]
215    fn test_leading_slash_anchors_to_root_only() {
216        // `/build` (root-anchored) ignores the top-level `build/` but
217        // not a nested `nested/build/` directory. Distinct semantics
218        // from the bare `build` pattern, which matches anywhere.
219        let patterns = vec!["/build".to_string()];
220        assert!(should_ignore(&PathBuf::from("build/output"), &patterns));
221        assert!(!should_ignore(
222            &PathBuf::from("nested/build/file"),
223            &patterns
224        ));
225    }
226
227    #[test]
228    fn test_character_class_matches_set() {
229        // `[Mm]akefile` — matches uppercase or lowercase variants.
230        // Standard gitignore character class.
231        let patterns = vec!["[Mm]akefile".to_string()];
232        assert!(should_ignore(&PathBuf::from("Makefile"), &patterns));
233        assert!(should_ignore(&PathBuf::from("makefile"), &patterns));
234        assert!(!should_ignore(&PathBuf::from("Rakefile"), &patterns));
235    }
236
237    #[test]
238    fn test_comments_and_blank_lines_are_handled_upstream() {
239        // The matcher itself accepts every line it's given verbatim
240        // (gitignore-spec treats `#` as a comment marker). Repository
241        // strips comments before calling, but verify the matcher
242        // tolerates them so a future refactor can stop stripping
243        // without behavior change.
244        let patterns = vec!["# comment".to_string(), "".to_string(), "*.log".to_string()];
245        assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
246        assert!(!should_ignore(&PathBuf::from("foo.txt"), &patterns));
247    }
248
249    #[test]
250    fn test_malformed_pattern_does_not_break_matcher() {
251        // Unbalanced bracket: builder errors silently and the
252        // pattern is dropped. Other rules continue to apply.
253        let patterns = vec!["[unbalanced".to_string(), "*.log".to_string()];
254        assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
255    }
256}