jj_lib/
gitignore.rs

1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::fs;
18use std::io;
19use std::iter;
20use std::path::Path;
21use std::path::PathBuf;
22use std::sync::Arc;
23
24use ignore::gitignore;
25use thiserror::Error;
26
27#[derive(Debug, Error)]
28pub enum GitIgnoreError {
29    #[error("Failed to read ignore patterns from file {path}")]
30    ReadFile { path: PathBuf, source: io::Error },
31    #[error("Invalid UTF-8 for ignore pattern in {path} on line #{line_num_for_display}: {line}")]
32    InvalidUtf8 {
33        path: PathBuf,
34        line_num_for_display: usize,
35        line: String,
36        source: std::str::Utf8Error,
37    },
38    #[error("Failed to parse ignore patterns from file {path}")]
39    Underlying {
40        path: PathBuf,
41        source: ignore::Error,
42    },
43}
44
45/// Models the effective contents of multiple .gitignore files.
46#[derive(Debug)]
47pub struct GitIgnoreFile {
48    parent: Option<Arc<GitIgnoreFile>>,
49    matcher: gitignore::Gitignore,
50}
51
52impl GitIgnoreFile {
53    pub fn empty() -> Arc<GitIgnoreFile> {
54        Arc::new(GitIgnoreFile {
55            parent: None,
56            matcher: gitignore::Gitignore::empty(),
57        })
58    }
59
60    /// Concatenates new `.gitignore` content at the `prefix` directory.
61    ///
62    /// The `prefix` should be a slash-separated path relative to the workspace
63    /// root.
64    pub fn chain(
65        self: &Arc<GitIgnoreFile>,
66        prefix: &str,
67        ignore_path: &Path,
68        input: &[u8],
69    ) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
70        let mut builder = gitignore::GitignoreBuilder::new(prefix);
71        for (i, input_line) in input.split(|b| *b == b'\n').enumerate() {
72            let line =
73                std::str::from_utf8(input_line).map_err(|err| GitIgnoreError::InvalidUtf8 {
74                    path: ignore_path.to_path_buf(),
75                    line_num_for_display: i + 1,
76                    line: String::from_utf8_lossy(input_line).to_string(),
77                    source: err,
78                })?;
79            // The `from` argument doesn't provide any diagnostics or correctness, so it is
80            // not required. It only allows retrieving the path from the `Glob` later, which
81            // we never do.
82            builder
83                .add_line(None, line)
84                .map_err(|err| GitIgnoreError::Underlying {
85                    path: ignore_path.to_path_buf(),
86                    source: err,
87                })?;
88        }
89        let matcher = builder.build().map_err(|err| GitIgnoreError::Underlying {
90            path: ignore_path.to_path_buf(),
91            source: err,
92        })?;
93        let parent = if self.matcher.is_empty() {
94            self.parent.clone() // omit the empty root
95        } else {
96            Some(self.clone())
97        };
98        Ok(Arc::new(GitIgnoreFile { parent, matcher }))
99    }
100
101    /// Concatenates new `.gitignore` file at the `prefix` directory.
102    ///
103    /// The `prefix` should be a slash-separated path relative to the workspace
104    /// root.
105    pub fn chain_with_file(
106        self: &Arc<GitIgnoreFile>,
107        prefix: &str,
108        file: PathBuf,
109    ) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
110        if file.is_file() {
111            let buf = fs::read(&file).map_err(|err| GitIgnoreError::ReadFile {
112                path: file.clone(),
113                source: err,
114            })?;
115            self.chain(prefix, &file, &buf)
116        } else {
117            Ok(self.clone())
118        }
119    }
120
121    fn matches_helper(&self, path: &str, is_dir: bool) -> bool {
122        iter::successors(Some(self), |file| file.parent.as_deref())
123            .find_map(|file| {
124                // TODO: the documentation warns that
125                // `matched_path_or_any_parents` is slower than `matched`;
126                // ideally, we would switch to that.
127                match file.matcher.matched_path_or_any_parents(path, is_dir) {
128                    ignore::Match::None => None,
129                    ignore::Match::Ignore(_) => Some(true),
130                    ignore::Match::Whitelist(_) => Some(false),
131                }
132            })
133            .unwrap_or_default()
134    }
135
136    /// Returns whether specified path (not just file!) should be ignored. This
137    /// method does not directly define which files should not be tracked in
138    /// the repository. Instead, it performs a simple matching against the
139    /// last applicable .gitignore line. The effective set of paths
140    /// ignored in the repository should take into account that all (untracked)
141    /// files within a ignored directory should be ignored unconditionally.
142    /// The code in this file does not take that into account.
143    pub fn matches(&self, path: &str) -> bool {
144        //If path ends with slash, consider it as a directory.
145        let (path, is_dir) = match path.strip_suffix('/') {
146            Some(path) => (path, true),
147            None => (path, false),
148        };
149        self.matches_helper(path, is_dir)
150    }
151}
152
153#[cfg(test)]
154mod tests {
155
156    use super::*;
157
158    fn matches(input: &[u8], path: &str) -> bool {
159        let file = GitIgnoreFile::empty()
160            .chain("", Path::new(""), input)
161            .unwrap();
162        file.matches(path)
163    }
164
165    #[test]
166    fn test_gitignore_empty_file() {
167        let file = GitIgnoreFile::empty();
168        assert!(!file.matches("foo"));
169    }
170
171    #[test]
172    fn test_gitignore_empty_file_with_prefix() {
173        let file = GitIgnoreFile::empty()
174            .chain("dir/", Path::new(""), b"")
175            .unwrap();
176        assert!(!file.matches("dir/foo"));
177    }
178
179    #[test]
180    fn test_gitignore_literal() {
181        let file = GitIgnoreFile::empty()
182            .chain("", Path::new(""), b"foo\n")
183            .unwrap();
184        assert!(file.matches("foo"));
185        assert!(file.matches("dir/foo"));
186        assert!(file.matches("dir/subdir/foo"));
187        assert!(!file.matches("food"));
188        assert!(!file.matches("dir/food"));
189    }
190
191    #[test]
192    fn test_gitignore_literal_with_prefix() {
193        let file = GitIgnoreFile::empty()
194            .chain("./dir/", Path::new(""), b"foo\n")
195            .unwrap();
196        assert!(file.matches("dir/foo"));
197        assert!(file.matches("dir/subdir/foo"));
198    }
199
200    #[test]
201    fn test_gitignore_pattern_same_as_prefix() {
202        let file = GitIgnoreFile::empty()
203            .chain("dir/", Path::new(""), b"dir\n")
204            .unwrap();
205        assert!(file.matches("dir/dir"));
206        // We don't want the "dir" pattern to apply to the parent directory
207        assert!(!file.matches("dir/foo"));
208    }
209
210    #[test]
211    fn test_gitignore_rooted_literal() {
212        let file = GitIgnoreFile::empty()
213            .chain("", Path::new(""), b"/foo\n")
214            .unwrap();
215        assert!(file.matches("foo"));
216        assert!(!file.matches("dir/foo"));
217    }
218
219    #[test]
220    fn test_gitignore_rooted_literal_with_prefix() {
221        let file = GitIgnoreFile::empty()
222            .chain("dir/", Path::new(""), b"/foo\n")
223            .unwrap();
224        assert!(file.matches("dir/foo"));
225        assert!(!file.matches("dir/subdir/foo"));
226    }
227
228    #[test]
229    fn test_gitignore_deep_dir() {
230        let file = GitIgnoreFile::empty()
231            .chain("", Path::new(""), b"/dir1/dir2/dir3\n")
232            .unwrap();
233        assert!(!file.matches("foo"));
234        assert!(!file.matches("dir1/foo"));
235        assert!(!file.matches("dir1/dir2/foo"));
236        assert!(file.matches("dir1/dir2/dir3/foo"));
237        assert!(file.matches("dir1/dir2/dir3/dir4/foo"));
238    }
239
240    #[test]
241    fn test_gitignore_deep_dir_chained() {
242        // Prefix is relative to root, not to parent file
243        let file = GitIgnoreFile::empty()
244            .chain("", Path::new(""), b"/dummy\n")
245            .unwrap()
246            .chain("dir1/", Path::new(""), b"/dummy\n")
247            .unwrap()
248            .chain("dir1/dir2/", Path::new(""), b"/dir3\n")
249            .unwrap();
250        assert!(!file.matches("foo"));
251        assert!(!file.matches("dir1/foo"));
252        assert!(!file.matches("dir1/dir2/foo"));
253        assert!(file.matches("dir1/dir2/dir3/foo"));
254        assert!(file.matches("dir1/dir2/dir3/dir4/foo"));
255    }
256
257    #[test]
258    fn test_gitignore_match_only_dir() {
259        let file = GitIgnoreFile::empty()
260            .chain("", Path::new(""), b"/dir/\n")
261            .unwrap();
262        assert!(!file.matches("dir"));
263        assert!(file.matches("dir/foo"));
264        assert!(file.matches("dir/subdir/foo"));
265    }
266
267    #[test]
268    fn test_gitignore_unusual_symbols() {
269        assert!(matches(b"\\*\n", "*"));
270        assert!(!matches(b"\\*\n", "foo"));
271        assert!(matches(b"\\!\n", "!"));
272        assert!(matches(b"\\?\n", "?"));
273        assert!(!matches(b"\\?\n", "x"));
274        assert!(matches(b"\\w\n", "w"));
275        assert!(GitIgnoreFile::empty()
276            .chain("", Path::new(""), b"\\\n")
277            .is_err());
278    }
279
280    #[test]
281    #[cfg(not(target_os = "windows"))]
282    fn test_gitignore_backslash_path() {
283        assert!(!matches(b"/foo/bar", "/foo\\bar"));
284        assert!(!matches(b"/foo/bar", "/foo/bar\\"));
285
286        assert!(!matches(b"/foo/bar/", "/foo\\bar/"));
287        assert!(!matches(b"/foo/bar/", "/foo\\bar\\/"));
288
289        // Invalid escapes are treated like literal backslashes
290        assert!(!matches(b"\\w\n", "\\w"));
291        assert!(matches(b"\\\\ \n", "\\ "));
292        assert!(matches(b"\\\\\\ \n", "\\ "));
293    }
294
295    #[test]
296    #[cfg(target_os = "windows")]
297    /// ignore crate consider backslashes as a directory divider only on
298    /// Windows.
299    fn test_gitignore_backslash_path() {
300        assert!(matches(b"/foo/bar", "/foo\\bar"));
301        assert!(matches(b"/foo/bar", "/foo/bar\\"));
302
303        assert!(matches(b"/foo/bar/", "/foo\\bar/"));
304        assert!(matches(b"/foo/bar/", "/foo\\bar\\/"));
305
306        assert!(matches(b"\\w\n", "\\w"));
307        assert!(!matches(b"\\\\ \n", "\\ "));
308        assert!(!matches(b"\\\\\\ \n", "\\ "));
309    }
310
311    #[test]
312    fn test_gitignore_whitespace() {
313        assert!(!matches(b" \n", " "));
314        assert!(matches(b"\\ \n", " "));
315        assert!(!matches(b"\\\\ \n", " "));
316        assert!(matches(b" a\n", " a"));
317        assert!(matches(b"a b\n", "a b"));
318        assert!(matches(b"a b \n", "a b"));
319        assert!(!matches(b"a b \n", "a b "));
320        assert!(matches(b"a b\\ \\ \n", "a b  "));
321        // Trail CRs at EOL is ignored
322        assert!(matches(b"a\r\n", "a"));
323        assert!(!matches(b"a\r\n", "a\r"));
324        assert!(!matches(b"a\r\r\n", "a\r"));
325        assert!(matches(b"a\r\r\n", "a"));
326        assert!(!matches(b"a\r\r\n", "a\r\r"));
327        assert!(matches(b"a\r\r\n", "a"));
328        assert!(matches(b"\ra\n", "\ra"));
329        assert!(!matches(b"\ra\n", "a"));
330        assert!(GitIgnoreFile::empty()
331            .chain("", Path::new(""), b"a b \\  \n")
332            .is_err());
333    }
334
335    #[test]
336    fn test_gitignore_glob() {
337        assert!(!matches(b"*.o\n", "foo"));
338        assert!(matches(b"*.o\n", "foo.o"));
339        assert!(!matches(b"foo.?\n", "foo"));
340        assert!(!matches(b"foo.?\n", "foo."));
341        assert!(matches(b"foo.?\n", "foo.o"));
342    }
343
344    #[test]
345    fn test_gitignore_range() {
346        assert!(!matches(b"foo.[az]\n", "foo"));
347        assert!(matches(b"foo.[az]\n", "foo.a"));
348        assert!(!matches(b"foo.[az]\n", "foo.g"));
349        assert!(matches(b"foo.[az]\n", "foo.z"));
350        assert!(!matches(b"foo.[a-z]\n", "foo"));
351        assert!(matches(b"foo.[a-z]\n", "foo.a"));
352        assert!(matches(b"foo.[a-z]\n", "foo.g"));
353        assert!(matches(b"foo.[a-z]\n", "foo.z"));
354        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.5"));
355        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.c"));
356        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.E"));
357        assert!(!matches(b"foo.[0-9a-fA-F]\n", "foo._"));
358    }
359
360    #[test]
361    fn test_gitignore_leading_dir_glob() {
362        assert!(matches(b"**/foo\n", "foo"));
363        assert!(matches(b"**/foo\n", "dir1/dir2/foo"));
364        assert!(matches(b"**/foo\n", "foo/file"));
365        assert!(matches(b"**/dir/foo\n", "dir/foo"));
366        assert!(matches(b"**/dir/foo\n", "dir1/dir2/dir/foo"));
367    }
368
369    #[test]
370    fn test_gitignore_leading_dir_glob_with_prefix() {
371        let file = GitIgnoreFile::empty()
372            .chain("dir1/dir2/", Path::new(""), b"**/foo\n")
373            .unwrap();
374        assert!(file.matches("dir1/dir2/foo"));
375        assert!(!file.matches("dir1/dir2/bar"));
376        assert!(file.matches("dir1/dir2/sub1/sub2/foo"));
377        assert!(!file.matches("dir1/dir2/sub1/sub2/bar"));
378    }
379
380    #[test]
381    fn test_gitignore_trailing_dir_glob() {
382        assert!(!matches(b"abc/**\n", "abc"));
383        assert!(matches(b"abc/**\n", "abc/file"));
384        assert!(matches(b"abc/**\n", "abc/dir/file"));
385    }
386
387    #[test]
388    fn test_gitignore_internal_dir_glob() {
389        assert!(matches(b"a/**/b\n", "a/b"));
390        assert!(matches(b"a/**/b\n", "a/x/b"));
391        assert!(matches(b"a/**/b\n", "a/x/y/b"));
392        assert!(!matches(b"a/**/b\n", "ax/y/b"));
393        assert!(!matches(b"a/**/b\n", "a/x/yb"));
394        assert!(!matches(b"a/**/b\n", "ab"));
395    }
396
397    #[test]
398    fn test_gitignore_internal_dir_glob_not_really() {
399        assert!(!matches(b"a/x**y/b\n", "a/b"));
400        assert!(matches(b"a/x**y/b\n", "a/xy/b"));
401        assert!(matches(b"a/x**y/b\n", "a/xzzzy/b"));
402    }
403
404    #[test]
405    fn test_gitignore_line_ordering() {
406        assert!(matches(b"foo\n!foo/bar\n", "foo"));
407        assert!(!matches(b"foo\n!foo/bar\n", "foo/bar"));
408        assert!(matches(b"foo\n!foo/bar\n", "foo/baz"));
409        assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo"));
410        assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar"));
411        assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/baz"));
412        assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/quux"));
413        assert!(!matches(b"foo/*\n!foo/bar", "foo/bar"));
414    }
415
416    #[test]
417    fn test_gitignore_file_ordering() {
418        let file1 = GitIgnoreFile::empty()
419            .chain("", Path::new(""), b"/foo\n")
420            .unwrap();
421        let file2 = file1.chain("foo/", Path::new(""), b"!/bar").unwrap();
422        let file3 = file2.chain("foo/bar/", Path::new(""), b"/baz").unwrap();
423        assert!(file1.matches("foo"));
424        assert!(file1.matches("foo/bar"));
425        assert!(!file2.matches("foo/bar"));
426        assert!(!file2.matches("foo/bar/baz"));
427        assert!(file2.matches("foo/baz"));
428        assert!(file3.matches("foo/bar/baz"));
429        assert!(!file3.matches("foo/bar/qux"));
430    }
431
432    #[test]
433    fn test_gitignore_negative_parent_directory() {
434        // The following script shows that Git ignores the file:
435        //
436        // ```bash
437        // $ rm -rf test-repo && \
438        //   git init test-repo &>/dev/null && \
439        //   cd test-repo && \
440        //   printf 'A/B.*\n!/A/\n' >.gitignore && \
441        //   mkdir A && \
442        //   touch A/B.ext && \
443        //   git check-ignore A/B.ext
444        // A/B.ext
445        // ```
446        let ignore = GitIgnoreFile::empty()
447            .chain("", Path::new(""), b"foo/bar.*\n!/foo/\n")
448            .unwrap();
449        assert!(ignore.matches("foo/bar.ext"));
450
451        let ignore = GitIgnoreFile::empty()
452            .chain("", Path::new(""), b"!/foo/\nfoo/bar.*\n")
453            .unwrap();
454        assert!(ignore.matches("foo/bar.ext"));
455    }
456}