Skip to main content

jj_lib/
gitignore.rs

1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::fs;
18use std::io;
19use std::iter;
20use std::path::Path;
21use std::path::PathBuf;
22use std::sync::Arc;
23
24use ignore::gitignore;
25use thiserror::Error;
26
27#[derive(Debug, Error)]
28pub enum GitIgnoreError {
29    #[error("Failed to read ignore patterns from file {path}")]
30    ReadFile { path: PathBuf, source: io::Error },
31    #[error("Invalid UTF-8 for ignore pattern in {path} on line #{line_num_for_display}: {line}")]
32    InvalidUtf8 {
33        path: PathBuf,
34        line_num_for_display: usize,
35        line: String,
36        source: std::str::Utf8Error,
37    },
38    #[error("Failed to parse ignore patterns from file {path}")]
39    Underlying {
40        path: PathBuf,
41        source: ignore::Error,
42    },
43}
44
45/// Models the effective contents of multiple .gitignore files.
46#[derive(Debug)]
47pub struct GitIgnoreFile {
48    parent: Option<Arc<Self>>,
49    matcher: gitignore::Gitignore,
50}
51
52impl GitIgnoreFile {
53    pub fn empty() -> Arc<Self> {
54        Arc::new(Self {
55            parent: None,
56            matcher: gitignore::Gitignore::empty(),
57        })
58    }
59
60    /// Concatenates new `.gitignore` content at the `prefix` directory.
61    ///
62    /// The `prefix` should be a slash-separated path relative to the workspace
63    /// root.
64    pub fn chain(
65        self: &Arc<Self>,
66        prefix: &str,
67        ignore_path: &Path,
68        input: &[u8],
69    ) -> Result<Arc<Self>, GitIgnoreError> {
70        let mut builder = gitignore::GitignoreBuilder::new(prefix);
71        for (i, input_line) in strip_bom(input).split(|b| *b == b'\n').enumerate() {
72            if input_line.starts_with(b"#") {
73                continue;
74            }
75
76            let line = str::from_utf8(input_line).map_err(|err| GitIgnoreError::InvalidUtf8 {
77                path: ignore_path.to_path_buf(),
78                line_num_for_display: i + 1,
79                line: String::from_utf8_lossy(input_line).to_string(),
80                source: err,
81            })?;
82            // The `from` argument doesn't provide any diagnostics or correctness, so it is
83            // not required. It only allows retrieving the path from the `Glob` later, which
84            // we never do.
85            builder
86                .add_line(None, line)
87                .map_err(|err| GitIgnoreError::Underlying {
88                    path: ignore_path.to_path_buf(),
89                    source: err,
90                })?;
91        }
92        let matcher = builder.build().map_err(|err| GitIgnoreError::Underlying {
93            path: ignore_path.to_path_buf(),
94            source: err,
95        })?;
96        let parent = if self.matcher.is_empty() {
97            self.parent.clone() // omit the empty root
98        } else {
99            Some(self.clone())
100        };
101        Ok(Arc::new(Self { parent, matcher }))
102    }
103
104    /// Concatenates new `.gitignore` file at the `prefix` directory.
105    ///
106    /// The `prefix` should be a slash-separated path relative to the workspace
107    /// root.
108    pub fn chain_with_file(
109        self: &Arc<Self>,
110        prefix: &str,
111        file: PathBuf,
112    ) -> Result<Arc<Self>, GitIgnoreError> {
113        if file.is_file() {
114            let buf = fs::read(&file).map_err(|err| GitIgnoreError::ReadFile {
115                path: file.clone(),
116                source: err,
117            })?;
118            self.chain(prefix, &file, &buf)
119        } else {
120            Ok(self.clone())
121        }
122    }
123
124    fn matches_helper(&self, path: &str, is_dir: bool) -> bool {
125        iter::successors(Some(self), |file| file.parent.as_deref())
126            .find_map(|file| {
127                // TODO: the documentation warns that
128                // `matched_path_or_any_parents` is slower than `matched`;
129                // ideally, we would switch to that.
130                match file.matcher.matched_path_or_any_parents(path, is_dir) {
131                    ignore::Match::None => None,
132                    ignore::Match::Ignore(_) => Some(true),
133                    ignore::Match::Whitelist(_) => Some(false),
134                }
135            })
136            .unwrap_or_default()
137    }
138
139    /// Returns whether specified path (not just file!) should be ignored. This
140    /// method does not directly define which files should not be tracked in
141    /// the repository. Instead, it performs a simple matching against the
142    /// last applicable .gitignore line. The effective set of paths
143    /// ignored in the repository should take into account that all (untracked)
144    /// files within a ignored directory should be ignored unconditionally.
145    /// The code in this file does not take that into account.
146    pub fn matches(&self, path: &str) -> bool {
147        //If path ends with slash, consider it as a directory.
148        let (path, is_dir) = match path.strip_suffix('/') {
149            Some(path) => (path, true),
150            None => (path, false),
151        };
152        self.matches_helper(path, is_dir)
153    }
154}
155
156fn strip_bom(text: &[u8]) -> &[u8] {
157    text.strip_prefix("\u{feff}".as_bytes()).unwrap_or(text)
158}
159
160#[cfg(test)]
161mod tests {
162
163    use super::*;
164
165    fn matches(input: &[u8], path: &str) -> bool {
166        let file = GitIgnoreFile::empty()
167            .chain("", Path::new(""), input)
168            .unwrap();
169        file.matches(path)
170    }
171
172    #[test]
173    fn test_gitignore_empty_file() {
174        let file = GitIgnoreFile::empty();
175        assert!(!file.matches("foo"));
176    }
177
178    #[test]
179    fn test_gitignore_empty_file_with_prefix() {
180        let file = GitIgnoreFile::empty()
181            .chain("dir/", Path::new(""), b"")
182            .unwrap();
183        assert!(!file.matches("dir/foo"));
184    }
185
186    #[test]
187    fn test_gitignore_literal() {
188        let file = GitIgnoreFile::empty()
189            .chain("", Path::new(""), b"foo\n")
190            .unwrap();
191        assert!(file.matches("foo"));
192        assert!(file.matches("dir/foo"));
193        assert!(file.matches("dir/subdir/foo"));
194        assert!(!file.matches("food"));
195        assert!(!file.matches("dir/food"));
196    }
197
198    #[test]
199    fn test_gitignore_literal_with_prefix() {
200        let file = GitIgnoreFile::empty()
201            .chain("./dir/", Path::new(""), b"foo\n")
202            .unwrap();
203        assert!(file.matches("dir/foo"));
204        assert!(file.matches("dir/subdir/foo"));
205    }
206
207    #[test]
208    fn test_gitignore_pattern_same_as_prefix() {
209        let file = GitIgnoreFile::empty()
210            .chain("dir/", Path::new(""), b"dir\n")
211            .unwrap();
212        assert!(file.matches("dir/dir"));
213        // We don't want the "dir" pattern to apply to the parent directory
214        assert!(!file.matches("dir/foo"));
215    }
216
217    #[test]
218    fn test_gitignore_rooted_literal() {
219        let file = GitIgnoreFile::empty()
220            .chain("", Path::new(""), b"/foo\n")
221            .unwrap();
222        assert!(file.matches("foo"));
223        assert!(!file.matches("dir/foo"));
224    }
225
226    #[test]
227    fn test_gitignore_rooted_literal_with_prefix() {
228        let file = GitIgnoreFile::empty()
229            .chain("dir/", Path::new(""), b"/foo\n")
230            .unwrap();
231        assert!(file.matches("dir/foo"));
232        assert!(!file.matches("dir/subdir/foo"));
233    }
234
235    #[test]
236    fn test_gitignore_deep_dir() {
237        let file = GitIgnoreFile::empty()
238            .chain("", Path::new(""), b"/dir1/dir2/dir3\n")
239            .unwrap();
240        assert!(!file.matches("foo"));
241        assert!(!file.matches("dir1/foo"));
242        assert!(!file.matches("dir1/dir2/foo"));
243        assert!(file.matches("dir1/dir2/dir3/foo"));
244        assert!(file.matches("dir1/dir2/dir3/dir4/foo"));
245    }
246
247    #[test]
248    fn test_gitignore_deep_dir_chained() {
249        // Prefix is relative to root, not to parent file
250        let file = GitIgnoreFile::empty()
251            .chain("", Path::new(""), b"/dummy\n")
252            .unwrap()
253            .chain("dir1/", Path::new(""), b"/dummy\n")
254            .unwrap()
255            .chain("dir1/dir2/", Path::new(""), b"/dir3\n")
256            .unwrap();
257        assert!(!file.matches("foo"));
258        assert!(!file.matches("dir1/foo"));
259        assert!(!file.matches("dir1/dir2/foo"));
260        assert!(file.matches("dir1/dir2/dir3/foo"));
261        assert!(file.matches("dir1/dir2/dir3/dir4/foo"));
262    }
263
264    #[test]
265    fn test_gitignore_match_only_dir() {
266        let file = GitIgnoreFile::empty()
267            .chain("", Path::new(""), b"/dir/\n")
268            .unwrap();
269        assert!(!file.matches("dir"));
270        assert!(file.matches("dir/foo"));
271        assert!(file.matches("dir/subdir/foo"));
272    }
273
274    #[test]
275    fn test_gitignore_unusual_symbols() {
276        assert!(matches(b"\\*\n", "*"));
277        assert!(!matches(b"\\*\n", "foo"));
278        assert!(matches(b"\\!\n", "!"));
279        assert!(matches(b"\\?\n", "?"));
280        assert!(!matches(b"\\?\n", "x"));
281        assert!(matches(b"\\w\n", "w"));
282        assert!(
283            GitIgnoreFile::empty()
284                .chain("", Path::new(""), b"\\\n")
285                .is_err()
286        );
287    }
288
289    #[test]
290    #[cfg(not(target_os = "windows"))]
291    fn test_gitignore_backslash_path() {
292        assert!(!matches(b"/foo/bar", "/foo\\bar"));
293        assert!(!matches(b"/foo/bar", "/foo/bar\\"));
294
295        assert!(!matches(b"/foo/bar/", "/foo\\bar/"));
296        assert!(!matches(b"/foo/bar/", "/foo\\bar\\/"));
297
298        // Invalid escapes are treated like literal backslashes
299        assert!(!matches(b"\\w\n", "\\w"));
300        assert!(matches(b"\\\\ \n", "\\ "));
301        assert!(matches(b"\\\\\\ \n", "\\ "));
302    }
303
304    #[test]
305    #[cfg(target_os = "windows")]
306    /// ignore crate consider backslashes as a directory divider only on
307    /// Windows.
308    fn test_gitignore_backslash_path() {
309        assert!(matches(b"/foo/bar", "/foo\\bar"));
310        assert!(matches(b"/foo/bar", "/foo/bar\\"));
311
312        assert!(matches(b"/foo/bar/", "/foo\\bar/"));
313        assert!(matches(b"/foo/bar/", "/foo\\bar\\/"));
314
315        assert!(matches(b"\\w\n", "\\w"));
316        assert!(!matches(b"\\\\ \n", "\\ "));
317        assert!(!matches(b"\\\\\\ \n", "\\ "));
318    }
319
320    #[test]
321    fn test_gitignore_whitespace() {
322        assert!(!matches(b" \n", " "));
323        assert!(matches(b"\\ \n", " "));
324        assert!(!matches(b"\\\\ \n", " "));
325        assert!(matches(b" a\n", " a"));
326        assert!(matches(b"a b\n", "a b"));
327        assert!(matches(b"a b \n", "a b"));
328        assert!(!matches(b"a b \n", "a b "));
329        assert!(matches(b"a b\\ \\ \n", "a b  "));
330        // Trail CRs at EOL is ignored
331        assert!(matches(b"a\r\n", "a"));
332        assert!(!matches(b"a\r\n", "a\r"));
333        assert!(!matches(b"a\r\r\n", "a\r"));
334        assert!(matches(b"a\r\r\n", "a"));
335        assert!(!matches(b"a\r\r\n", "a\r\r"));
336        assert!(matches(b"a\r\r\n", "a"));
337        assert!(matches(b"\ra\n", "\ra"));
338        assert!(!matches(b"\ra\n", "a"));
339        assert!(
340            GitIgnoreFile::empty()
341                .chain("", Path::new(""), b"a b \\  \n")
342                .is_err()
343        );
344    }
345
346    #[test]
347    fn test_gitignore_glob() {
348        assert!(!matches(b"*.o\n", "foo"));
349        assert!(matches(b"*.o\n", "foo.o"));
350        assert!(!matches(b"foo.?\n", "foo"));
351        assert!(!matches(b"foo.?\n", "foo."));
352        assert!(matches(b"foo.?\n", "foo.o"));
353    }
354
355    #[test]
356    fn test_gitignore_range() {
357        assert!(!matches(b"foo.[az]\n", "foo"));
358        assert!(matches(b"foo.[az]\n", "foo.a"));
359        assert!(!matches(b"foo.[az]\n", "foo.g"));
360        assert!(matches(b"foo.[az]\n", "foo.z"));
361        assert!(!matches(b"foo.[a-z]\n", "foo"));
362        assert!(matches(b"foo.[a-z]\n", "foo.a"));
363        assert!(matches(b"foo.[a-z]\n", "foo.g"));
364        assert!(matches(b"foo.[a-z]\n", "foo.z"));
365        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.5"));
366        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.c"));
367        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.E"));
368        assert!(!matches(b"foo.[0-9a-fA-F]\n", "foo._"));
369    }
370
371    #[test]
372    fn test_gitignore_leading_dir_glob() {
373        assert!(matches(b"**/foo\n", "foo"));
374        assert!(matches(b"**/foo\n", "dir1/dir2/foo"));
375        assert!(matches(b"**/foo\n", "foo/file"));
376        assert!(matches(b"**/dir/foo\n", "dir/foo"));
377        assert!(matches(b"**/dir/foo\n", "dir1/dir2/dir/foo"));
378    }
379
380    #[test]
381    fn test_gitignore_leading_dir_glob_with_prefix() {
382        let file = GitIgnoreFile::empty()
383            .chain("dir1/dir2/", Path::new(""), b"**/foo\n")
384            .unwrap();
385        assert!(file.matches("dir1/dir2/foo"));
386        assert!(!file.matches("dir1/dir2/bar"));
387        assert!(file.matches("dir1/dir2/sub1/sub2/foo"));
388        assert!(!file.matches("dir1/dir2/sub1/sub2/bar"));
389    }
390
391    #[test]
392    fn test_gitignore_trailing_dir_glob() {
393        assert!(!matches(b"abc/**\n", "abc"));
394        assert!(matches(b"abc/**\n", "abc/file"));
395        assert!(matches(b"abc/**\n", "abc/dir/file"));
396    }
397
398    #[test]
399    fn test_gitignore_internal_dir_glob() {
400        assert!(matches(b"a/**/b\n", "a/b"));
401        assert!(matches(b"a/**/b\n", "a/x/b"));
402        assert!(matches(b"a/**/b\n", "a/x/y/b"));
403        assert!(!matches(b"a/**/b\n", "ax/y/b"));
404        assert!(!matches(b"a/**/b\n", "a/x/yb"));
405        assert!(!matches(b"a/**/b\n", "ab"));
406    }
407
408    #[test]
409    fn test_gitignore_internal_dir_glob_not_really() {
410        assert!(!matches(b"a/x**y/b\n", "a/b"));
411        assert!(matches(b"a/x**y/b\n", "a/xy/b"));
412        assert!(matches(b"a/x**y/b\n", "a/xzzzy/b"));
413    }
414
415    #[test]
416    fn test_gitignore_with_utf8_bom() {
417        assert!(matches(b"\xef\xbb\xbffoo\n", "foo"));
418        assert!(!matches(b"\n\xef\xbb\xbffoo\n", "foo"));
419    }
420
421    #[test]
422    fn test_gitignore_line_ordering() {
423        assert!(matches(b"foo\n!foo/bar\n", "foo"));
424        assert!(!matches(b"foo\n!foo/bar\n", "foo/bar"));
425        assert!(matches(b"foo\n!foo/bar\n", "foo/baz"));
426        assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo"));
427        assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar"));
428        assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/baz"));
429        assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/quux"));
430        assert!(!matches(b"foo/*\n!foo/bar", "foo/bar"));
431    }
432
433    #[test]
434    fn test_gitignore_file_ordering() {
435        let file1 = GitIgnoreFile::empty()
436            .chain("", Path::new(""), b"/foo\n")
437            .unwrap();
438        let file2 = file1.chain("foo/", Path::new(""), b"!/bar").unwrap();
439        let file3 = file2.chain("foo/bar/", Path::new(""), b"/baz").unwrap();
440        assert!(file1.matches("foo"));
441        assert!(file1.matches("foo/bar"));
442        assert!(!file2.matches("foo/bar"));
443        assert!(!file2.matches("foo/bar/baz"));
444        assert!(file2.matches("foo/baz"));
445        assert!(file3.matches("foo/bar/baz"));
446        assert!(!file3.matches("foo/bar/qux"));
447    }
448
449    #[test]
450    fn test_gitignore_negative_parent_directory() {
451        // The following script shows that Git ignores the file:
452        //
453        // ```bash
454        // $ rm -rf test-repo && \
455        //   git init test-repo &>/dev/null && \
456        //   cd test-repo && \
457        //   printf 'A/B.*\n!/A/\n' >.gitignore && \
458        //   mkdir A && \
459        //   touch A/B.ext && \
460        //   git check-ignore A/B.ext
461        // A/B.ext
462        // ```
463        let ignore = GitIgnoreFile::empty()
464            .chain("", Path::new(""), b"foo/bar.*\n!/foo/\n")
465            .unwrap();
466        assert!(ignore.matches("foo/bar.ext"));
467
468        let ignore = GitIgnoreFile::empty()
469            .chain("", Path::new(""), b"!/foo/\nfoo/bar.*\n")
470            .unwrap();
471        assert!(ignore.matches("foo/bar.ext"));
472    }
473
474    #[test]
475    fn test_gitignore_invalid_utf8() {
476        // This tests that comments are not parsed
477        // The following slice is the byte representation of the following comment
478        // string:
479        //#à
480        let non_ascii_bytes = [35, 224];
481
482        let ignore = GitIgnoreFile::empty().chain("", Path::new(""), &non_ascii_bytes);
483        assert!(ignore.is_ok());
484
485        // Test without the leading #
486        let ignore = GitIgnoreFile::empty().chain("", Path::new(""), &non_ascii_bytes[1..]);
487        assert!(ignore.is_err());
488    }
489}