1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
use std::{
collections::{HashMap, hash_map::Entry},
ffi::OsStr,
path::{Path, PathBuf},
sync::{Arc, Mutex},
};
use crate::{
constant,
evaluator::{self, File, git_config, git_root, types::Result, utils},
};
/// An evaluator for `.gitignore` files in a given directory and its parent directories.
///
/// The evaluator maintains an internal cache of parsed `.gitignore` files to optimize performance when evaluating
/// multiple paths within the same directory structure.
///
/// The full specification of the `.gitignore` format, along with the behaviour and hierarchy of `.gitignore` files,
/// can be found in the [git documentation](https://git-scm.com/docs/gitignore#_description).
///
/// # Examples
///
/// ```rust
/// use ignored::evaluator::Evaluator;
///
/// # std::fs::create_dir("tests/fixtures/mock-project/.git");
/// let evaluator = Evaluator::default();
/// let ignored = evaluator.is_ignored("tests/fixtures/mock-project/file.tmp");
///
/// assert!(ignored);
/// ```
#[derive(Debug, Default)]
pub struct Evaluator {
/// A map of previously parsed `.gitignore` files.
///
/// This is an optimisation which allows the evaluator to avoid re-parsing frequently accessed
/// `.gitignore` files.
files: Mutex<HashMap<PathBuf, Arc<File>>>,
config: git_config::ConfigHandler,
root: git_root::RootHandler,
}
impl Evaluator {
/// Evaluate whether an arbitrary path is ignored based on the `.gitignore` files in its directory
/// and parent directories.
///
/// `ignored` follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description) and
/// returns `true` if the path is ignored, and `false` otherwise.
///
/// # Examples
///
/// ```rust
/// use ignored::evaluator::Evaluator;
///
/// # std::fs::create_dir("tests/fixtures/mock-project/.git");
/// let evaluator = Evaluator::default();
/// let ignored = evaluator.is_ignored("tests/fixtures/mock-project/file.tmp");
///
/// assert!(ignored);
/// ```
#[must_use]
pub fn is_ignored(&self, path: impl AsRef<Path>) -> bool {
// Patterns read from a `.gitignore` file in the same directory as
// the path, or in any parent directory (up to the top-level of
// the working tree)
let git_root = match self.evaluate_gitignore_files(path.as_ref()) {
(_, Some(is_ignored)) => {
log::debug!(
"{} is ignored by .gitignore: {is_ignored}",
path.as_ref().display()
);
return is_ignored;
}
(git_root, None) => git_root,
};
// Patterns read from `$GIT_DIR/info/exclude`.
if let Some(ref git_root) = git_root {
if let Some(is_ignored) = self.evaluate_local_git_exclude_file(git_root, path.as_ref())
{
return is_ignored;
}
}
// Patterns read from the file specified by the configuration variable `core.excludesFile`.
if let Some(is_ignored) =
self.evaluate_global_git_exclude_file(git_root.as_ref(), path.as_ref())
{
return is_ignored;
}
false
}
/// Evaluate the repositories `.gitignore` files to determine if a given file or path is
/// ignored.
///
/// This is the first of three methods of ignoring files in git.
///
/// This follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description).
///
/// During traversal it also records the closest relative git root (directory containing a
/// `.git`), which is beneficial for the second evaluation method - which is an ignore file
/// listed in the git root (`.git/info/exclude`).
///
/// This method returns true or false, which denotes whether the file is ignored or not, only if the path was
/// matched in at least one `.gitignore` file. If not, [`Option::None`] will be returned,
/// denoting that no `.gitignore` file matched the path in either direction.
fn evaluate_gitignore_files(&self, path: impl AsRef<Path>) -> (Option<PathBuf>, Option<bool>) {
let mut closest_git_root = self.root.get_closest(&path);
let path_parts = path.as_ref().iter().collect::<Vec<&OsStr>>();
let closest_git_root_offset = closest_git_root
.as_ref()
.map_or(1, |root| root.components().count());
let mut is_in_git_root = closest_git_root.is_some();
let mut is_ignored = None;
for i in closest_git_root_offset..path_parts.len() {
let base_path: PathBuf = path_parts[0..i].iter().collect();
if self.root.record(&base_path)
&& closest_git_root
.as_ref()
.is_none_or(|closest| closest != &base_path)
{
// We've encountered this git root for the first time, we need to update our list of
// encountered git roots. We also might already be in a git root (i.e. `.git` in a
// subdirectory of another git root), in which case we need to reset our current
// ignored decision.
if is_in_git_root {
// We've reached _another_ git root, even though we're already in a git root (i.e.
// a repo inside a repo). We should reset our current ignored decision.
is_ignored = None;
log::debug!(
"Encountered recursive git root at: {}",
base_path.as_path().display()
);
} else {
is_in_git_root = true;
log::debug!("Encountered git root at: {}", base_path.as_path().display());
}
// Update the closest git root as we've now encountered one we previously didn't
// know about.
closest_git_root = Some(base_path.clone());
} else if !is_in_git_root {
// We've still not reached a git root (i.e. a `.git` folder). Conforming to git's
// semantics this means any `.gitignore` files don't apply.
continue;
}
let potential_gitignore = base_path.join(constant::GITIGNORE_FILE);
let gitignore_file = match self
.get_or_parse_gitignore(Option::<&PathBuf>::None, potential_gitignore.as_path())
{
Ok(Some(gitignore_file)) => gitignore_file,
Ok(None) => continue,
Err(e) => {
log::error!(
"Failed to read .gitignore file at {}: {:?}",
potential_gitignore.display(),
e
);
continue;
}
};
// NB: Because `[0..=i]` is inclusive (and the range driving this loop starts at 1) it's
// effectively the same as `[0..i+1]`, which is why it works to select the parent.
let parent_path = path_parts[0..=i].iter().collect::<PathBuf>().join("");
if gitignore_file
.is_ignored(parent_path.as_path())
.is_some_and(|ignored| ignored)
{
// Git doesn’t list excluded directories for performance reasons, so any patterns one
// contained files have no effect, no matter where they are defined.
//
// In other words, despite keep.me being explicitly not ignored in the example below, the
// vendor directory is still ignored, which causes keep.me to be ignored as well:
//
// ```
// vendor/
// !vendor/keep.me
// ```
log::debug!(
"{} is ignored so {} is ignored by association.",
parent_path.as_path().display(),
path.as_ref().display()
);
return (closest_git_root, Some(true));
}
if let Some(result) = gitignore_file.is_ignored(path.as_ref()) {
// Patterns in the higher level files are overridden by those in
// lower level files down to the directory containing the file.
//
// We _have to_ check patterns in the higher levels _first_ because
// they might ignore whole directories which will prevent evaluations
// in the lower levels from having any effect.
is_ignored = Some(result);
}
}
(closest_git_root, is_ignored)
}
/// Evaluate the repositories `.git/info/exclude` located at the root of the working tree.
///
/// This is the second of three methods of ignoring files in git.
///
/// This follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description).
///
/// This method returns true or false, which denotes whether the file is ignored or not, only if the path was
/// matched in `.git/info/exclude`. If not, [`Option::None`] will be returned, denoting that the path was not listed.
fn evaluate_local_git_exclude_file(
&self,
git_root: &impl AsRef<Path>,
path: impl AsRef<Path>,
) -> Option<bool> {
let exclude_file = self.root.get_exclude_path(git_root)?;
let gitignore_file = match self.get_or_parse_gitignore(Some(git_root), &exclude_file) {
Ok(file) => file,
Err(e) => {
log::error!(
"Failed to read .gitignore file at {}: {:?}",
exclude_file.display(),
e
);
None
}
};
if let Some(gitignore_file) = gitignore_file {
if let Some(is_ignored) = gitignore_file.is_ignored(&path) {
log::debug!(
"{} is ignored by {}: {is_ignored}",
exclude_file.as_path().display(),
path.as_ref().display()
);
return Some(is_ignored);
}
}
None
}
/// Evaluate the users global `.gitignore` file (located by default at `$XDG_CONFIG_HOME/git/ignore`, or
/// if `$XDG_CONFIG_HOME` is either not set or empty, `$HOME/.config/git/ignore.`, and customised using
/// `core.excludesfile` in global git configuration).
///
/// This is the third of three methods of ignoring files in git.
///
/// This follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description),
/// and the git config rules defined in the [git documentation](https://git-scm.com/docs/git-config#FILES).
///
/// This method returns true or false, which denotes whether the file is ignored or not, only if the path was
/// matched in the global git ignore file. If not, [`Option::None`] will be returned, denoting that the path was not listed.
fn evaluate_global_git_exclude_file(
&self,
git_root: Option<&impl AsRef<Path>>,
path: impl AsRef<Path>,
) -> Option<bool> {
let Ok(exclude_file) = self.config.get_global_git_exclude_file_path() else {
return None;
};
let gitignore_file = match self.get_or_parse_gitignore(git_root, exclude_file.as_ref()?) {
Ok(file) => file,
Err(e) => {
log::error!("Failed to read global .gitignore file at {exclude_file:?}: {e:?}");
None
}
};
if let Some(gitignore_file) = gitignore_file {
if let Some(is_ignored) = gitignore_file.is_ignored(&path) {
log::debug!(
"{} is ignored by {:?}: {is_ignored}",
path.as_ref().display(),
exclude_file
);
return Some(is_ignored);
}
}
None
}
/// Parse a `.gitignore` file at the given path, or return a cached version if it has already been parsed
/// and hasn't changed since.
///
/// Optionally, provide a base path to override the path to which all glob patterns defined inside the file
/// should be relative to.
///
/// When no base path is provided, the base path is assumed to be relative to the file being read. This is fine for
/// regular `.gitignore` files, however, when dealing with both global exclude files, and git root exclude files, the
/// base path provided will be the closest git root, not the file itself.
fn get_or_parse_gitignore(
&self,
base_path: Option<&impl AsRef<Path>>,
potential_gitignore: impl AsRef<Path>,
) -> Result<Option<Arc<File>>> {
if !potential_gitignore.as_ref().exists() {
return Ok(None);
}
let mut guard = self.files.lock().map_err(|_| {
evaluator::Error::CachePoisoned(potential_gitignore.as_ref().to_path_buf())
})?;
let gitignore_file = match guard.entry(potential_gitignore.as_ref().to_path_buf()) {
Entry::Occupied(mut e) => {
let (checksum, file) = {
let existing_file = e.get_mut();
let (target_checksum, file_handle) = crate::utils::compute_checksum(
potential_gitignore.as_ref(),
)
.map_err(|e| evaluator::Error::FileError {
file: potential_gitignore.as_ref().to_path_buf(),
source: e,
})?;
if existing_file.checksum == target_checksum {
return Ok(Some(Arc::clone(existing_file)));
}
(target_checksum, file_handle)
};
// We've parsed this file before but the content has changed. We need to re-parse
// it from scratch
Arc::clone(&e.insert(Arc::new(utils::read_gitignore(
base_path.as_ref(),
potential_gitignore.as_ref(),
file,
&checksum,
)?)))
}
Entry::Vacant(e) => {
let (target_checksum, file_handle) =
crate::utils::compute_checksum(potential_gitignore.as_ref()).map_err(|e| {
evaluator::Error::FileError {
file: potential_gitignore.as_ref().to_path_buf(),
source: e,
}
})?;
let gitignore_file = Arc::new(utils::read_gitignore(
base_path.as_ref(),
potential_gitignore.as_ref(),
file_handle,
&target_checksum,
)?);
// We've never encountered this file before, we need to parse it
Arc::clone(e.insert(gitignore_file))
}
};
drop(guard);
Ok(Some(gitignore_file))
}
}