objects/worktree/worktree_ignore.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Ignore pattern helpers for worktree operations.
3//!
4//! `.heddleignore` follows the same syntax as `.gitignore`: literal
5//! names, leading `/` for root-anchored rules, trailing `/` for
6//! directory-only matches, `*` and `**` glob wildcards, character
7//! classes (`[abc]`), and `!` negation (whitelist) rules. The matcher
8//! delegates to the `ignore` crate's gitignore implementation so the
9//! semantics are spec-compliant; only the patterns themselves are
10//! sourced from `.heddleignore` instead of `.gitignore`.
11//!
12//! Three "root-admin" pattern names — `.heddle`, `.heddleignore`,
13//! and `.git` — get an implicit leading `/` so they match only at
14//! the repo root. This preserves the long-standing invariant that a
15//! nested `.heddle/` directory (e.g. an `examples/calculator/.heddle`
16//! fixture) is *captured*, not silently dropped. Operators who want
17//! the gitignore-spec "match anywhere" behavior for those names can
18//! write `**/<name>` explicitly.
19
20use std::path::Path;
21
22use ignore::gitignore::{Gitignore, GitignoreBuilder};
23
24/// Whether `path` is covered by any of the `.heddleignore` patterns.
25///
26/// `is_dir = true` is passed to the underlying gitignore matcher so
27/// trailing-slash rules (`target/`, `build/`) match the bare directory
28/// entry itself — not just paths *inside* it. This preserves the
29/// pre-existing in-house matcher's behavior, where `build/` on a bare
30/// `build` path returned `true`. Walker callers depend on this to
31/// prune entire directory subtrees before descending; the alternative
32/// (`is_dir = false`) caused unnecessary traversal of `target/`,
33/// `node_modules/`, and other build trees.
34///
35/// Non-directory rules (`*.log`, `node_modules`, `[Mm]akefile`) are
36/// unaffected — gitignore-spec rules without a trailing slash match
37/// regardless of the `is_dir` flag.
38pub fn should_ignore(path: &Path, patterns: &[String]) -> bool {
39 matched(&build_matcher(patterns), path)
40}
41
42/// Build a `Gitignore` matcher from the given pattern strings,
43/// translating the root-admin special cases (`.heddle`,
44/// `.heddleignore`, `.git`) into root-anchored gitignore syntax.
45fn build_matcher(patterns: &[String]) -> Gitignore {
46 // Root path is symbolic — paths fed to `matched` are interpreted
47 // relative to it. Callers always pass repo-relative paths, so the
48 // root just needs to be a stable, in-memory anchor.
49 let mut builder = GitignoreBuilder::new("");
50 for pattern in patterns {
51 let line = canonical_line(pattern);
52 // `add_line` returns Err only on malformed glob syntax. We
53 // silently skip malformed user patterns — heddle's ingest path
54 // shouldn't error on a typo'd `.heddleignore` line; it should
55 // ignore the bad rule and keep going.
56 let _ = builder.add_line(None, &line);
57 }
58 // `build()` only fails on internal compile errors. The empty
59 // matcher (`Gitignore::empty()`) matches nothing — the right
60 // failure mode if we get here.
61 builder.build().unwrap_or_else(|_| Gitignore::empty())
62}
63
64/// Rewrite root-admin special-case names into root-anchored
65/// gitignore syntax. Pass-through for every other pattern, so
66/// gitignore semantics (`*`, `**`, `[abc]`, `!negation`, trailing
67/// `/`, leading `/`) all flow through verbatim.
68fn canonical_line(pattern: &str) -> String {
69 match pattern {
70 ".heddle" => "/.heddle".to_string(),
71 ".heddleignore" => "/.heddleignore".to_string(),
72 ".git" => "/.git".to_string(),
73 other => other.to_string(),
74 }
75}
76
77/// Apply the matcher to a relative path. Whitelist (`!negation`)
78/// rules unset the match; we surface only the `Ignore` outcome.
79///
80/// `is_dir = true`: trailing-slash rules (`build/`) match the bare
81/// directory entry as well as paths inside it. See the docstring on
82/// `should_ignore` for the migration rationale.
83fn matched(gi: &Gitignore, path: &Path) -> bool {
84 matches!(
85 gi.matched_path_or_any_parents(path, /* is_dir */ true),
86 ignore::Match::Ignore(_)
87 )
88}
89
90#[cfg(test)]
91mod tests {
92 use std::path::PathBuf;
93
94 use super::*;
95
96 #[test]
97 fn test_glob_extension() {
98 let patterns = vec!["*.log".to_string()];
99 assert!(should_ignore(&PathBuf::from("test.log"), &patterns));
100 assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
101 assert!(!should_ignore(&PathBuf::from("test.txt"), &patterns));
102 }
103
104 #[test]
105 fn test_directory_pattern() {
106 let patterns = vec!["build/".to_string()];
107 assert!(should_ignore(&PathBuf::from("build/output.txt"), &patterns));
108 // Bare directory match: walker callers ask `should_ignore` to
109 // decide whether to prune `build/` before descending. With
110 // `is_dir = true` plumbed into the gitignore matcher, the
111 // trailing-slash rule fires on the directory entry itself.
112 // Without this, walks of large dependency / build trees
113 // (`target/`, `node_modules/`) recurse unnecessarily.
114 assert!(should_ignore(&PathBuf::from("build"), &patterns));
115 assert!(should_ignore(&PathBuf::from("build/anything"), &patterns));
116 assert!(!should_ignore(&PathBuf::from("builder.txt"), &patterns));
117 }
118
119 #[test]
120 fn test_simple_pattern() {
121 let patterns = vec!["node_modules".to_string()];
122 assert!(should_ignore(
123 &PathBuf::from("node_modules/package.json"),
124 &patterns
125 ));
126 assert!(!should_ignore(&PathBuf::from("src/main.rs"), &patterns));
127 }
128
129 #[test]
130 fn test_simple_pattern_does_not_match_prefixes() {
131 let patterns = vec!["target".to_string()];
132 assert!(should_ignore(
133 &PathBuf::from("target/output.txt"),
134 &patterns
135 ));
136 assert!(should_ignore(&PathBuf::from("build/target/app"), &patterns));
137 assert!(!should_ignore(&PathBuf::from("target.txt"), &patterns));
138 assert!(!should_ignore(
139 &PathBuf::from("targeted/output.txt"),
140 &patterns
141 ));
142 }
143
144 #[test]
145 fn test_root_admin_patterns_do_not_ignore_nested_paths() {
146 let patterns = vec![".heddle".to_string(), ".heddleignore".to_string()];
147 assert!(should_ignore(&PathBuf::from(".heddle/objects"), &patterns));
148 assert!(should_ignore(
149 &PathBuf::from(".heddle/state/index.bin"),
150 &patterns
151 ));
152 assert!(should_ignore(&PathBuf::from(".heddleignore"), &patterns));
153 assert!(!should_ignore(
154 &PathBuf::from("examples/calculator/.heddle/objects"),
155 &patterns
156 ));
157 assert!(!should_ignore(
158 &PathBuf::from("examples/calculator/.heddle/state/index.bin"),
159 &patterns
160 ));
161 assert!(!should_ignore(
162 &PathBuf::from("examples/calculator/.heddleignore"),
163 &patterns
164 ));
165 }
166
167 // ---- New gitignore-spec coverage ----
168
169 #[test]
170 fn test_path_relative_glob_matches_specific_directory_only() {
171 // `config/*.toml` is the case the user called out — a glob
172 // anchored to a specific subdirectory, with `*` matching one
173 // path segment. Plain `secrets.toml` at the root must NOT be
174 // ignored.
175 let patterns = vec!["config/*.toml".to_string()];
176 assert!(should_ignore(
177 &PathBuf::from("config/secrets.toml"),
178 &patterns
179 ));
180 assert!(should_ignore(
181 &PathBuf::from("config/database.toml"),
182 &patterns
183 ));
184 assert!(!should_ignore(&PathBuf::from("secrets.toml"), &patterns));
185 assert!(!should_ignore(
186 &PathBuf::from("other/secrets.toml"),
187 &patterns
188 ));
189 }
190
191 #[test]
192 fn test_double_star_recursive_glob_descends_directories() {
193 // `**/*.pem` matches at any depth — the canonical "find every
194 // PEM key under any directory" pattern.
195 let patterns = vec!["**/*.pem".to_string()];
196 assert!(should_ignore(&PathBuf::from("dev.pem"), &patterns));
197 assert!(should_ignore(&PathBuf::from("keys/dev.pem"), &patterns));
198 assert!(should_ignore(
199 &PathBuf::from("nested/deeper/key.pem"),
200 &patterns
201 ));
202 assert!(!should_ignore(&PathBuf::from("dev.txt"), &patterns));
203 }
204
205 #[test]
206 fn test_negation_rule_whitelists_a_path() {
207 // `*.log` then `!keep.log` — the negation rule unsets the
208 // earlier match for that specific name.
209 let patterns = vec!["*.log".to_string(), "!keep.log".to_string()];
210 assert!(should_ignore(&PathBuf::from("debug.log"), &patterns));
211 assert!(!should_ignore(&PathBuf::from("keep.log"), &patterns));
212 }
213
214 #[test]
215 fn test_leading_slash_anchors_to_root_only() {
216 // `/build` (root-anchored) ignores the top-level `build/` but
217 // not a nested `nested/build/` directory. Distinct semantics
218 // from the bare `build` pattern, which matches anywhere.
219 let patterns = vec!["/build".to_string()];
220 assert!(should_ignore(&PathBuf::from("build/output"), &patterns));
221 assert!(!should_ignore(
222 &PathBuf::from("nested/build/file"),
223 &patterns
224 ));
225 }
226
227 #[test]
228 fn test_character_class_matches_set() {
229 // `[Mm]akefile` — matches uppercase or lowercase variants.
230 // Standard gitignore character class.
231 let patterns = vec!["[Mm]akefile".to_string()];
232 assert!(should_ignore(&PathBuf::from("Makefile"), &patterns));
233 assert!(should_ignore(&PathBuf::from("makefile"), &patterns));
234 assert!(!should_ignore(&PathBuf::from("Rakefile"), &patterns));
235 }
236
237 #[test]
238 fn test_comments_and_blank_lines_are_handled_upstream() {
239 // The matcher itself accepts every line it's given verbatim
240 // (gitignore-spec treats `#` as a comment marker). Repository
241 // strips comments before calling, but verify the matcher
242 // tolerates them so a future refactor can stop stripping
243 // without behavior change.
244 let patterns = vec!["# comment".to_string(), "".to_string(), "*.log".to_string()];
245 assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
246 assert!(!should_ignore(&PathBuf::from("foo.txt"), &patterns));
247 }
248
249 #[test]
250 fn test_malformed_pattern_does_not_break_matcher() {
251 // Unbalanced bracket: builder errors silently and the
252 // pattern is dropped. Other rules continue to apply.
253 let patterns = vec!["[unbalanced".to_string(), "*.log".to_string()];
254 assert!(should_ignore(&PathBuf::from("foo.log"), &patterns));
255 }
256}