Skip to main content

jj_lib/
gitignore.rs

1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::fs;
18use std::io;
19use std::iter;
20use std::path::Path;
21use std::path::PathBuf;
22use std::sync::Arc;
23
24use thiserror::Error;
25
26use crate::repo_path::RepoPath;
27use crate::repo_path::RepoPathBuf;
28
29#[derive(Debug, Error)]
30pub enum GitIgnoreError {
31    #[error("Failed to read ignore patterns from file {path}")]
32    ReadFile { path: PathBuf, source: io::Error },
33}
34
35/// Models the effective contents of multiple .gitignore files.
36#[derive(Debug)]
37pub struct GitIgnoreFile {
38    parent: Option<Arc<Self>>,
39    matcher: gix_ignore::Search,
40    prefix: RepoPathBuf,
41}
42
43impl GitIgnoreFile {
44    pub fn empty() -> Arc<Self> {
45        Arc::new(Self {
46            parent: None,
47            matcher: gix_ignore::Search::default(),
48            prefix: RepoPathBuf::root(),
49        })
50    }
51
52    /// Concatenates new `.gitignore` content at the `prefix` directory.
53    pub fn chain(
54        self: &Arc<Self>,
55        prefix: &RepoPath,
56        ignore_path: &Path,
57        input: &[u8],
58    ) -> Result<Arc<Self>, GitIgnoreError> {
59        // Construct the gix search object.
60        let mut matcher = gix_ignore::Search::default();
61        // Since we strip the path prefix manually in matches(), the root path
62        // shouldn't be set. add_patterns_buffer() expects filesystem path pairs
63        // e.g. ignore_path = "/repo/bar/.gitignore" and root = "/repo".
64        let root = None;
65        matcher.add_patterns_buffer(
66            input,
67            ignore_path,
68            root,
69            gix_ignore::search::Ignore {
70                support_precious: false,
71            },
72        );
73
74        let parent = if self.matcher.patterns.is_empty() {
75            self.parent.clone() // omit the empty root
76        } else {
77            Some(self.clone())
78        };
79        Ok(Arc::new(Self {
80            parent,
81            matcher,
82            prefix: prefix.to_owned(),
83        }))
84    }
85
86    /// Concatenates new `.gitignore` file at the `prefix` directory.
87    pub fn chain_with_file(
88        self: &Arc<Self>,
89        prefix: &RepoPath,
90        file: PathBuf,
91    ) -> Result<Arc<Self>, GitIgnoreError> {
92        if file.is_file() {
93            let buf = fs::read(&file).map_err(|err| GitIgnoreError::ReadFile {
94                path: file.clone(),
95                source: err,
96            })?;
97            self.chain(prefix, &file, &buf)
98        } else {
99            Ok(self.clone())
100        }
101    }
102
103    /// Returns whether the specified file path should be ignored.
104    ///
105    /// This method does not directly define which files should not be tracked
106    /// in the repository. Instead, it performs a simple matching against the
107    /// last applicable .gitignore line.
108    ///
109    /// This only performs exact matching; callers handle recursion of parent
110    /// directories. Callers shouldn't recursively match inside ignored
111    /// directories, because all (untracked) child files should also be ignored;
112    /// the exact matching logic won't give correct results in that case.
113    pub fn matches_file(&self, path: &RepoPath) -> bool {
114        self.matches(path, false)
115    }
116
117    /// Returns whether the specified directory path should be ignored.
118    ///
119    /// See [`GitIgnoreFile::matches_file()`] for details.
120    pub fn matches_dir(&self, path: &RepoPath) -> bool {
121        self.matches(path, true)
122    }
123
124    fn matches(&self, path: &RepoPath, is_dir: bool) -> bool {
125        for file in iter::successors(Some(self), |file| file.parent.as_deref()) {
126            if let Some(relative_path) = path.strip_prefix(&file.prefix)
127                && !relative_path.is_root()
128            {
129                let m = file.matcher.pattern_matching_relative_path(
130                    relative_path.as_internal_file_string().as_ref(),
131                    Some(is_dir),
132                    gix_ignore::glob::pattern::Case::Sensitive,
133                );
134                if let Some(m) = m {
135                    return !m.pattern.is_negative();
136                }
137            }
138        }
139
140        false
141    }
142}
143
144#[cfg(test)]
145mod tests {
146
147    use super::*;
148
149    // Would ideally be a constant, but we can't create a Path at compile time.
150    fn ignore_path() -> &'static Path {
151        Path::new(".gitignore")
152    }
153
154    fn repo_path(value: &str) -> &RepoPath {
155        RepoPath::from_internal_string(value).unwrap()
156    }
157
158    fn matches(input: &[u8], path: &str) -> bool {
159        let file = GitIgnoreFile::empty()
160            .chain(RepoPath::root(), ignore_path(), input)
161            .unwrap();
162        match path.strip_suffix('/') {
163            Some(dir) => file.matches_dir(repo_path(dir)),
164            None => file.matches_file(repo_path(path)),
165        }
166    }
167
168    #[test]
169    fn test_gitignore_empty_file() {
170        let file = GitIgnoreFile::empty();
171        assert!(!file.matches_file(repo_path("foo")));
172    }
173
174    #[test]
175    fn test_gitignore_empty_file_with_prefix() {
176        let file = GitIgnoreFile::empty()
177            .chain(repo_path("dir"), ignore_path(), b"")
178            .unwrap();
179        assert!(!file.matches_file(repo_path("dir/foo")));
180    }
181
182    #[test]
183    fn test_gitignore_literal() {
184        let file = GitIgnoreFile::empty()
185            .chain(RepoPath::root(), ignore_path(), b"foo\n")
186            .unwrap();
187        assert!(file.matches_file(repo_path("foo")));
188        assert!(file.matches_file(repo_path("dir/foo")));
189        assert!(file.matches_file(repo_path("dir/subdir/foo")));
190        assert!(!file.matches_file(repo_path("food")));
191        assert!(!file.matches_file(repo_path("dir/food")));
192    }
193
194    #[test]
195    fn test_gitignore_literal_with_prefix() {
196        let file = GitIgnoreFile::empty()
197            .chain(repo_path("dir"), ignore_path(), b"foo\n")
198            .unwrap();
199        assert!(file.matches_file(repo_path("dir/foo")));
200        assert!(file.matches_file(repo_path("dir/subdir/foo")));
201    }
202
203    #[test]
204    fn test_gitignore_pattern_same_as_prefix() {
205        let file = GitIgnoreFile::empty()
206            .chain(repo_path("dir"), ignore_path(), b"dir\n")
207            .unwrap();
208        assert!(file.matches_file(repo_path("dir/dir")));
209        // We don't want the "dir" pattern to apply to the parent directory
210        assert!(!file.matches_file(repo_path("dir/foo")));
211    }
212
213    #[test]
214    fn test_gitignore_rooted_literal() {
215        let file = GitIgnoreFile::empty()
216            .chain(RepoPath::root(), ignore_path(), b"/foo\n")
217            .unwrap();
218        assert!(file.matches_file(repo_path("foo")));
219        assert!(!file.matches_file(repo_path("dir/foo")));
220    }
221
222    #[test]
223    fn test_gitignore_rooted_literal_with_prefix() {
224        let file = GitIgnoreFile::empty()
225            .chain(repo_path("dir"), ignore_path(), b"/foo\n")
226            .unwrap();
227        assert!(file.matches_file(repo_path("dir/foo")));
228        assert!(!file.matches_file(repo_path("dir/subdir/foo")));
229    }
230
231    #[test]
232    fn test_gitignore_deep_dir() {
233        let file = GitIgnoreFile::empty()
234            .chain(RepoPath::root(), ignore_path(), b"/dir1/dir2/dir3\n")
235            .unwrap();
236        assert!(!file.matches_file(repo_path("foo")));
237        assert!(!file.matches_dir(repo_path("dir1")));
238        assert!(!file.matches_dir(repo_path("dir1/dir2")));
239        assert!(file.matches_dir(repo_path("dir1/dir2/dir3")));
240        assert!(!file.matches_dir(repo_path("dir1/dir2/dir3/dir4")));
241    }
242
243    #[test]
244    fn test_gitignore_deep_dir_chained() {
245        // Prefix is relative to root, not to parent file
246        let file = GitIgnoreFile::empty()
247            .chain(RepoPath::root(), ignore_path(), b"/dummy\n")
248            .unwrap()
249            .chain(repo_path("dir1"), ignore_path(), b"/dummy\n")
250            .unwrap()
251            .chain(repo_path("dir1/dir2"), ignore_path(), b"/dir3\n")
252            .unwrap();
253        assert!(!file.matches_file(repo_path("foo")));
254        assert!(!file.matches_dir(repo_path("dir1")));
255        assert!(!file.matches_dir(repo_path("dir1/dir2")));
256        assert!(file.matches_dir(repo_path("dir1/dir2/dir3")));
257        assert!(!file.matches_dir(repo_path("dir1/dir2/dir3/dir4")));
258    }
259
260    #[test]
261    fn test_gitignore_match_only_dir() {
262        let file = GitIgnoreFile::empty()
263            .chain(RepoPath::root(), ignore_path(), b"/dir/\n")
264            .unwrap();
265        assert!(!file.matches_file(repo_path("dir")));
266        assert!(file.matches_dir(repo_path("dir")));
267        assert!(!file.matches_file(repo_path("dir/subdir")));
268    }
269
270    #[test]
271    fn test_gitignore_unusual_symbols() {
272        assert!(matches(b"\\*\n", "*"));
273        assert!(!matches(b"\\*\n", "foo"));
274        assert!(matches(b"\\!\n", "!"));
275        assert!(matches(b"\\?\n", "?"));
276        assert!(!matches(b"\\?\n", "x"));
277        assert!(matches(b"\\w\n", "w"));
278        assert!(matches(b"\\\\\n", "\\"));
279        assert!(!matches(b"\\\n", "\\\n"));
280        assert!(!matches(b"\\\n", "\n"));
281    }
282
283    #[test]
284    fn test_gitignore_backslash_path() {
285        assert!(!matches(b"/foo/bar", "foo\\bar"));
286        assert!(!matches(b"/foo/bar", "foo/bar\\"));
287
288        assert!(!matches(b"/foo/bar/", "foo\\bar/"));
289        assert!(!matches(b"/foo/bar/", "foo\\bar\\/"));
290    }
291
292    #[test]
293    fn test_gitignore_whitespace() {
294        assert!(!matches(b" \n", " "));
295        assert!(matches(b"\\ \n", " "));
296        assert!(!matches(b"\\\\ \n", " "));
297        assert!(matches(b" a\n", " a"));
298        assert!(matches(b"a b\n", "a b"));
299        assert!(matches(b"a b \n", "a b"));
300        assert!(!matches(b"a b \n", "a b "));
301        assert!(matches(b"a b\\ \\ \n", "a b  "));
302        assert!(matches(b"a b\\ \\  \n", "a b  "));
303        // Trail CRs at EOL is ignored
304        assert!(matches(b"a\r\n", "a"));
305        assert!(!matches(b"a\r\n", "a\r"));
306        assert!(matches(b"a\r\r\n", "a\r"));
307        assert!(!matches(b"a\r\r\n", "a"));
308        assert!(!matches(b"a\r\r\n", "a\r\r"));
309        assert!(!matches(b"a\r\r\n", "a"));
310        assert!(matches(b"\ra\n", "\ra"));
311        assert!(!matches(b"\ra\n", "a"));
312    }
313
314    #[test]
315    fn test_gitignore_glob() {
316        assert!(!matches(b"*.o\n", "foo"));
317        assert!(matches(b"*.o\n", "foo.o"));
318        assert!(!matches(b"foo.?\n", "foo"));
319        assert!(!matches(b"foo.?\n", "foo."));
320        assert!(matches(b"foo.?\n", "foo.o"));
321    }
322
323    #[test]
324    fn test_gitignore_range() {
325        assert!(!matches(b"foo.[az]\n", "foo"));
326        assert!(matches(b"foo.[az]\n", "foo.a"));
327        assert!(!matches(b"foo.[az]\n", "foo.g"));
328        assert!(matches(b"foo.[az]\n", "foo.z"));
329        assert!(!matches(b"foo.[a-z]\n", "foo"));
330        assert!(matches(b"foo.[a-z]\n", "foo.a"));
331        assert!(matches(b"foo.[a-z]\n", "foo.g"));
332        assert!(matches(b"foo.[a-z]\n", "foo.z"));
333        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.5"));
334        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.c"));
335        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.E"));
336        assert!(!matches(b"foo.[0-9a-fA-F]\n", "foo._"));
337    }
338
339    #[test]
340    fn test_gitignore_leading_dir_glob() {
341        let file1 = GitIgnoreFile::empty()
342            .chain(RepoPath::root(), ignore_path(), b"**/foo\n")
343            .unwrap();
344        assert!(file1.matches_file(repo_path("foo")));
345        assert!(file1.matches_file(repo_path("dir1/dir2/foo")));
346        assert!(!file1.matches_file(repo_path("foo/file")));
347
348        let file2 = file1
349            .chain(RepoPath::root(), ignore_path(), b"**/foo\n")
350            .unwrap();
351        assert!(file2.matches_file(repo_path("dir/foo")));
352        assert!(file2.matches_file(repo_path("dir1/dir2/dir/foo")));
353    }
354
355    #[test]
356    fn test_gitignore_leading_dir_glob_with_prefix() {
357        let file = GitIgnoreFile::empty()
358            .chain(repo_path("dir1/dir2"), ignore_path(), b"**/foo\n")
359            .unwrap();
360        assert!(file.matches_file(repo_path("dir1/dir2/foo")));
361        assert!(!file.matches_file(repo_path("dir1/dir2/bar")));
362        assert!(file.matches_file(repo_path("dir1/dir2/sub1/sub2/foo")));
363        assert!(!file.matches_file(repo_path("dir1/dir2/sub1/sub2/bar")));
364    }
365
366    #[test]
367    fn test_gitignore_trailing_dir_glob() {
368        assert!(!matches(b"abc/**\n", "abc"));
369        assert!(matches(b"abc/**\n", "abc/file"));
370        assert!(matches(b"abc/**\n", "abc/dir/file"));
371    }
372
373    #[test]
374    fn test_gitignore_internal_dir_glob() {
375        assert!(matches(b"a/**/b\n", "a/b"));
376        assert!(matches(b"a/**/b\n", "a/x/b"));
377        assert!(matches(b"a/**/b\n", "a/x/y/b"));
378        assert!(!matches(b"a/**/b\n", "ax/y/b"));
379        assert!(!matches(b"a/**/b\n", "a/x/yb"));
380        assert!(!matches(b"a/**/b\n", "ab"));
381    }
382
383    #[test]
384    fn test_gitignore_internal_dir_glob_not_really() {
385        assert!(!matches(b"a/x**y/b\n", "a/b"));
386        assert!(matches(b"a/x**y/b\n", "a/xy/b"));
387        assert!(matches(b"a/x**y/b\n", "a/xzzzy/b"));
388    }
389
390    #[test]
391    fn test_gitignore_glob_all_root() {
392        let file = GitIgnoreFile::empty()
393            .chain(RepoPath::root(), ignore_path(), b"*\n")
394            .unwrap();
395        assert!(!file.matches_dir(RepoPath::root()));
396        assert!(file.matches_file(repo_path("foo")));
397        assert!(file.matches_dir(repo_path("foo")));
398        assert!(file.matches_file(repo_path("foo/bar")));
399        assert!(file.matches_dir(repo_path("foo/bar")));
400    }
401
402    #[test]
403    fn test_gitignore_glob_all_subdir() {
404        let file = GitIgnoreFile::empty()
405            .chain(repo_path("foo"), ignore_path(), b"*\n")
406            .unwrap();
407        assert!(!file.matches_dir(RepoPath::root()));
408        assert!(!file.matches_file(repo_path("foo")));
409        assert!(!file.matches_dir(repo_path("foo")));
410        assert!(file.matches_file(repo_path("foo/bar")));
411        assert!(file.matches_dir(repo_path("foo/bar")));
412        assert!(!file.matches_file(repo_path("bar/baz")));
413        assert!(!file.matches_dir(repo_path("bar/baz")));
414    }
415
416    #[test]
417    fn test_gitignore_with_utf8_bom() {
418        assert!(matches(b"\xef\xbb\xbffoo\n", "foo"));
419        assert!(!matches(b"\n\xef\xbb\xbffoo\n", "foo"));
420    }
421
422    #[test]
423    fn test_gitignore_line_ordering() {
424        let file1 = GitIgnoreFile::empty()
425            .chain(RepoPath::root(), ignore_path(), b"foo*\n!foobar*\n")
426            .unwrap();
427        assert!(file1.matches_file(repo_path("foo")));
428        assert!(!file1.matches_file(repo_path("foobar")));
429        assert!(!file1.matches_file(repo_path("foobarbaz")));
430
431        let file2 = GitIgnoreFile::empty()
432            .chain(
433                RepoPath::root(),
434                ignore_path(),
435                b"foo*\n!foobar*\nfoobarbaz",
436            )
437            .unwrap();
438        assert!(file2.matches_file(repo_path("foo")));
439        assert!(!file2.matches_file(repo_path("foobar")));
440        assert!(file2.matches_file(repo_path("foobarbaz")));
441        assert!(!file2.matches_file(repo_path("foobarquux")));
442
443        let file3 = GitIgnoreFile::empty()
444            .chain(RepoPath::root(), ignore_path(), b"foo/*\n!foo/bar")
445            .unwrap();
446        assert!(file3.matches_file(repo_path("foo/baz")));
447        assert!(!file3.matches_file(repo_path("foo/bar")));
448    }
449
450    #[test]
451    fn test_gitignore_file_ordering() {
452        let file1 = GitIgnoreFile::empty()
453            .chain(RepoPath::root(), ignore_path(), b"/foo\n")
454            .unwrap();
455        assert!(file1.matches_file(repo_path("foo")));
456        assert!(!file1.matches_file(repo_path("foo/bar")));
457        assert!(!file1.matches_file(repo_path("foo/bar/baz")));
458
459        let file2 = file1
460            .chain(repo_path("foo"), ignore_path(), b"!/bar")
461            .unwrap();
462        assert!(file1.matches_dir(repo_path("foo")));
463        assert!(!file2.matches_file(repo_path("foo/bar")));
464        assert!(!file2.matches_file(repo_path("foo/bar/baz")));
465        assert!(!file2.matches_file(repo_path("foo/baz")));
466
467        let file3 = file2
468            .chain(repo_path("foo/bar"), ignore_path(), b"/baz")
469            .unwrap();
470        assert!(!file2.matches_dir(repo_path("foo/bar")));
471        assert!(file3.matches_file(repo_path("foo/bar/baz")));
472        assert!(!file3.matches_file(repo_path("foo/bar/qux")));
473    }
474
475    #[test]
476    fn test_gitignore_slash_after_glob() {
477        let file = GitIgnoreFile::empty()
478            .chain(RepoPath::root(), ignore_path(), b"/*/\n")
479            .unwrap();
480        assert!(!file.matches_file(repo_path("foo")));
481        assert!(file.matches_dir(repo_path("foo")));
482        assert!(!file.matches_file(repo_path("foo/bar")));
483        assert!(!file.matches_file(repo_path("foo/bar/baz")));
484    }
485
486    #[test]
487    fn test_gitignore_negative_parent_directory() {
488        // The following script shows that Git ignores the file:
489        //
490        // ```bash
491        // $ rm -rf test-repo && \
492        //   git init test-repo &>/dev/null && \
493        //   cd test-repo && \
494        //   printf 'A/B.*\n!/A/\n' >.gitignore && \
495        //   mkdir A && \
496        //   touch A/B.ext && \
497        //   git check-ignore A/B.ext
498        // A/B.ext
499        // ```
500        let ignore = GitIgnoreFile::empty()
501            .chain(RepoPath::root(), ignore_path(), b"foo/bar.*\n!/foo/\n")
502            .unwrap();
503        assert!(ignore.matches_file(repo_path("foo/bar.ext")));
504
505        let ignore = GitIgnoreFile::empty()
506            .chain(RepoPath::root(), ignore_path(), b"!/foo/\nfoo/bar.*\n")
507            .unwrap();
508        assert!(ignore.matches_file(repo_path("foo/bar.ext")));
509    }
510
511    #[test]
512    fn test_gitignore_invalid_utf8() {
513        // Non-UTF-8 paths should be parsed without an error.
514        let ignore = GitIgnoreFile::empty().chain(RepoPath::root(), ignore_path(), &[224]);
515        assert!(ignore.is_ok());
516    }
517}