jj_lib/
gitignore.rs

1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::fs;
18use std::io;
19use std::iter;
20use std::path::Path;
21use std::path::PathBuf;
22use std::sync::Arc;
23
24use ignore::gitignore;
25use thiserror::Error;
26
27#[derive(Debug, Error)]
28pub enum GitIgnoreError {
29    #[error("Failed to read ignore patterns from file {path}")]
30    ReadFile { path: PathBuf, source: io::Error },
31    #[error("Invalid UTF-8 for ignore pattern in {path} on line #{line_num_for_display}: {line}")]
32    InvalidUtf8 {
33        path: PathBuf,
34        line_num_for_display: usize,
35        line: String,
36        source: std::str::Utf8Error,
37    },
38    #[error("Failed to parse ignore patterns from file {path}")]
39    Underlying {
40        path: PathBuf,
41        source: ignore::Error,
42    },
43}
44
45/// Models the effective contents of multiple .gitignore files.
46#[derive(Debug)]
47pub struct GitIgnoreFile {
48    parent: Option<Arc<GitIgnoreFile>>,
49    matcher: gitignore::Gitignore,
50}
51
52impl GitIgnoreFile {
53    pub fn empty() -> Arc<Self> {
54        Arc::new(Self {
55            parent: None,
56            matcher: gitignore::Gitignore::empty(),
57        })
58    }
59
60    /// Concatenates new `.gitignore` content at the `prefix` directory.
61    ///
62    /// The `prefix` should be a slash-separated path relative to the workspace
63    /// root.
64    pub fn chain(
65        self: &Arc<Self>,
66        prefix: &str,
67        ignore_path: &Path,
68        input: &[u8],
69    ) -> Result<Arc<Self>, GitIgnoreError> {
70        let mut builder = gitignore::GitignoreBuilder::new(prefix);
71        for (i, input_line) in input.split(|b| *b == b'\n').enumerate() {
72            if input_line.starts_with(b"#") {
73                continue;
74            }
75
76            let line = str::from_utf8(input_line).map_err(|err| GitIgnoreError::InvalidUtf8 {
77                path: ignore_path.to_path_buf(),
78                line_num_for_display: i + 1,
79                line: String::from_utf8_lossy(input_line).to_string(),
80                source: err,
81            })?;
82            // The `from` argument doesn't provide any diagnostics or correctness, so it is
83            // not required. It only allows retrieving the path from the `Glob` later, which
84            // we never do.
85            builder
86                .add_line(None, line)
87                .map_err(|err| GitIgnoreError::Underlying {
88                    path: ignore_path.to_path_buf(),
89                    source: err,
90                })?;
91        }
92        let matcher = builder.build().map_err(|err| GitIgnoreError::Underlying {
93            path: ignore_path.to_path_buf(),
94            source: err,
95        })?;
96        let parent = if self.matcher.is_empty() {
97            self.parent.clone() // omit the empty root
98        } else {
99            Some(self.clone())
100        };
101        Ok(Arc::new(Self { parent, matcher }))
102    }
103
104    /// Concatenates new `.gitignore` file at the `prefix` directory.
105    ///
106    /// The `prefix` should be a slash-separated path relative to the workspace
107    /// root.
108    pub fn chain_with_file(
109        self: &Arc<Self>,
110        prefix: &str,
111        file: PathBuf,
112    ) -> Result<Arc<Self>, GitIgnoreError> {
113        if file.is_file() {
114            let buf = fs::read(&file).map_err(|err| GitIgnoreError::ReadFile {
115                path: file.clone(),
116                source: err,
117            })?;
118            self.chain(prefix, &file, &buf)
119        } else {
120            Ok(self.clone())
121        }
122    }
123
124    fn matches_helper(&self, path: &str, is_dir: bool) -> bool {
125        iter::successors(Some(self), |file| file.parent.as_deref())
126            .find_map(|file| {
127                // TODO: the documentation warns that
128                // `matched_path_or_any_parents` is slower than `matched`;
129                // ideally, we would switch to that.
130                match file.matcher.matched_path_or_any_parents(path, is_dir) {
131                    ignore::Match::None => None,
132                    ignore::Match::Ignore(_) => Some(true),
133                    ignore::Match::Whitelist(_) => Some(false),
134                }
135            })
136            .unwrap_or_default()
137    }
138
139    /// Returns whether specified path (not just file!) should be ignored. This
140    /// method does not directly define which files should not be tracked in
141    /// the repository. Instead, it performs a simple matching against the
142    /// last applicable .gitignore line. The effective set of paths
143    /// ignored in the repository should take into account that all (untracked)
144    /// files within a ignored directory should be ignored unconditionally.
145    /// The code in this file does not take that into account.
146    pub fn matches(&self, path: &str) -> bool {
147        //If path ends with slash, consider it as a directory.
148        let (path, is_dir) = match path.strip_suffix('/') {
149            Some(path) => (path, true),
150            None => (path, false),
151        };
152        self.matches_helper(path, is_dir)
153    }
154}
155
156#[cfg(test)]
157mod tests {
158
159    use super::*;
160
161    fn matches(input: &[u8], path: &str) -> bool {
162        let file = GitIgnoreFile::empty()
163            .chain("", Path::new(""), input)
164            .unwrap();
165        file.matches(path)
166    }
167
168    #[test]
169    fn test_gitignore_empty_file() {
170        let file = GitIgnoreFile::empty();
171        assert!(!file.matches("foo"));
172    }
173
174    #[test]
175    fn test_gitignore_empty_file_with_prefix() {
176        let file = GitIgnoreFile::empty()
177            .chain("dir/", Path::new(""), b"")
178            .unwrap();
179        assert!(!file.matches("dir/foo"));
180    }
181
182    #[test]
183    fn test_gitignore_literal() {
184        let file = GitIgnoreFile::empty()
185            .chain("", Path::new(""), b"foo\n")
186            .unwrap();
187        assert!(file.matches("foo"));
188        assert!(file.matches("dir/foo"));
189        assert!(file.matches("dir/subdir/foo"));
190        assert!(!file.matches("food"));
191        assert!(!file.matches("dir/food"));
192    }
193
194    #[test]
195    fn test_gitignore_literal_with_prefix() {
196        let file = GitIgnoreFile::empty()
197            .chain("./dir/", Path::new(""), b"foo\n")
198            .unwrap();
199        assert!(file.matches("dir/foo"));
200        assert!(file.matches("dir/subdir/foo"));
201    }
202
203    #[test]
204    fn test_gitignore_pattern_same_as_prefix() {
205        let file = GitIgnoreFile::empty()
206            .chain("dir/", Path::new(""), b"dir\n")
207            .unwrap();
208        assert!(file.matches("dir/dir"));
209        // We don't want the "dir" pattern to apply to the parent directory
210        assert!(!file.matches("dir/foo"));
211    }
212
213    #[test]
214    fn test_gitignore_rooted_literal() {
215        let file = GitIgnoreFile::empty()
216            .chain("", Path::new(""), b"/foo\n")
217            .unwrap();
218        assert!(file.matches("foo"));
219        assert!(!file.matches("dir/foo"));
220    }
221
222    #[test]
223    fn test_gitignore_rooted_literal_with_prefix() {
224        let file = GitIgnoreFile::empty()
225            .chain("dir/", Path::new(""), b"/foo\n")
226            .unwrap();
227        assert!(file.matches("dir/foo"));
228        assert!(!file.matches("dir/subdir/foo"));
229    }
230
231    #[test]
232    fn test_gitignore_deep_dir() {
233        let file = GitIgnoreFile::empty()
234            .chain("", Path::new(""), b"/dir1/dir2/dir3\n")
235            .unwrap();
236        assert!(!file.matches("foo"));
237        assert!(!file.matches("dir1/foo"));
238        assert!(!file.matches("dir1/dir2/foo"));
239        assert!(file.matches("dir1/dir2/dir3/foo"));
240        assert!(file.matches("dir1/dir2/dir3/dir4/foo"));
241    }
242
243    #[test]
244    fn test_gitignore_deep_dir_chained() {
245        // Prefix is relative to root, not to parent file
246        let file = GitIgnoreFile::empty()
247            .chain("", Path::new(""), b"/dummy\n")
248            .unwrap()
249            .chain("dir1/", Path::new(""), b"/dummy\n")
250            .unwrap()
251            .chain("dir1/dir2/", Path::new(""), b"/dir3\n")
252            .unwrap();
253        assert!(!file.matches("foo"));
254        assert!(!file.matches("dir1/foo"));
255        assert!(!file.matches("dir1/dir2/foo"));
256        assert!(file.matches("dir1/dir2/dir3/foo"));
257        assert!(file.matches("dir1/dir2/dir3/dir4/foo"));
258    }
259
260    #[test]
261    fn test_gitignore_match_only_dir() {
262        let file = GitIgnoreFile::empty()
263            .chain("", Path::new(""), b"/dir/\n")
264            .unwrap();
265        assert!(!file.matches("dir"));
266        assert!(file.matches("dir/foo"));
267        assert!(file.matches("dir/subdir/foo"));
268    }
269
270    #[test]
271    fn test_gitignore_unusual_symbols() {
272        assert!(matches(b"\\*\n", "*"));
273        assert!(!matches(b"\\*\n", "foo"));
274        assert!(matches(b"\\!\n", "!"));
275        assert!(matches(b"\\?\n", "?"));
276        assert!(!matches(b"\\?\n", "x"));
277        assert!(matches(b"\\w\n", "w"));
278        assert!(
279            GitIgnoreFile::empty()
280                .chain("", Path::new(""), b"\\\n")
281                .is_err()
282        );
283    }
284
285    #[test]
286    #[cfg(not(target_os = "windows"))]
287    fn test_gitignore_backslash_path() {
288        assert!(!matches(b"/foo/bar", "/foo\\bar"));
289        assert!(!matches(b"/foo/bar", "/foo/bar\\"));
290
291        assert!(!matches(b"/foo/bar/", "/foo\\bar/"));
292        assert!(!matches(b"/foo/bar/", "/foo\\bar\\/"));
293
294        // Invalid escapes are treated like literal backslashes
295        assert!(!matches(b"\\w\n", "\\w"));
296        assert!(matches(b"\\\\ \n", "\\ "));
297        assert!(matches(b"\\\\\\ \n", "\\ "));
298    }
299
300    #[test]
301    #[cfg(target_os = "windows")]
302    /// ignore crate consider backslashes as a directory divider only on
303    /// Windows.
304    fn test_gitignore_backslash_path() {
305        assert!(matches(b"/foo/bar", "/foo\\bar"));
306        assert!(matches(b"/foo/bar", "/foo/bar\\"));
307
308        assert!(matches(b"/foo/bar/", "/foo\\bar/"));
309        assert!(matches(b"/foo/bar/", "/foo\\bar\\/"));
310
311        assert!(matches(b"\\w\n", "\\w"));
312        assert!(!matches(b"\\\\ \n", "\\ "));
313        assert!(!matches(b"\\\\\\ \n", "\\ "));
314    }
315
316    #[test]
317    fn test_gitignore_whitespace() {
318        assert!(!matches(b" \n", " "));
319        assert!(matches(b"\\ \n", " "));
320        assert!(!matches(b"\\\\ \n", " "));
321        assert!(matches(b" a\n", " a"));
322        assert!(matches(b"a b\n", "a b"));
323        assert!(matches(b"a b \n", "a b"));
324        assert!(!matches(b"a b \n", "a b "));
325        assert!(matches(b"a b\\ \\ \n", "a b  "));
326        // Trail CRs at EOL is ignored
327        assert!(matches(b"a\r\n", "a"));
328        assert!(!matches(b"a\r\n", "a\r"));
329        assert!(!matches(b"a\r\r\n", "a\r"));
330        assert!(matches(b"a\r\r\n", "a"));
331        assert!(!matches(b"a\r\r\n", "a\r\r"));
332        assert!(matches(b"a\r\r\n", "a"));
333        assert!(matches(b"\ra\n", "\ra"));
334        assert!(!matches(b"\ra\n", "a"));
335        assert!(
336            GitIgnoreFile::empty()
337                .chain("", Path::new(""), b"a b \\  \n")
338                .is_err()
339        );
340    }
341
342    #[test]
343    fn test_gitignore_glob() {
344        assert!(!matches(b"*.o\n", "foo"));
345        assert!(matches(b"*.o\n", "foo.o"));
346        assert!(!matches(b"foo.?\n", "foo"));
347        assert!(!matches(b"foo.?\n", "foo."));
348        assert!(matches(b"foo.?\n", "foo.o"));
349    }
350
351    #[test]
352    fn test_gitignore_range() {
353        assert!(!matches(b"foo.[az]\n", "foo"));
354        assert!(matches(b"foo.[az]\n", "foo.a"));
355        assert!(!matches(b"foo.[az]\n", "foo.g"));
356        assert!(matches(b"foo.[az]\n", "foo.z"));
357        assert!(!matches(b"foo.[a-z]\n", "foo"));
358        assert!(matches(b"foo.[a-z]\n", "foo.a"));
359        assert!(matches(b"foo.[a-z]\n", "foo.g"));
360        assert!(matches(b"foo.[a-z]\n", "foo.z"));
361        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.5"));
362        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.c"));
363        assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.E"));
364        assert!(!matches(b"foo.[0-9a-fA-F]\n", "foo._"));
365    }
366
367    #[test]
368    fn test_gitignore_leading_dir_glob() {
369        assert!(matches(b"**/foo\n", "foo"));
370        assert!(matches(b"**/foo\n", "dir1/dir2/foo"));
371        assert!(matches(b"**/foo\n", "foo/file"));
372        assert!(matches(b"**/dir/foo\n", "dir/foo"));
373        assert!(matches(b"**/dir/foo\n", "dir1/dir2/dir/foo"));
374    }
375
376    #[test]
377    fn test_gitignore_leading_dir_glob_with_prefix() {
378        let file = GitIgnoreFile::empty()
379            .chain("dir1/dir2/", Path::new(""), b"**/foo\n")
380            .unwrap();
381        assert!(file.matches("dir1/dir2/foo"));
382        assert!(!file.matches("dir1/dir2/bar"));
383        assert!(file.matches("dir1/dir2/sub1/sub2/foo"));
384        assert!(!file.matches("dir1/dir2/sub1/sub2/bar"));
385    }
386
387    #[test]
388    fn test_gitignore_trailing_dir_glob() {
389        assert!(!matches(b"abc/**\n", "abc"));
390        assert!(matches(b"abc/**\n", "abc/file"));
391        assert!(matches(b"abc/**\n", "abc/dir/file"));
392    }
393
394    #[test]
395    fn test_gitignore_internal_dir_glob() {
396        assert!(matches(b"a/**/b\n", "a/b"));
397        assert!(matches(b"a/**/b\n", "a/x/b"));
398        assert!(matches(b"a/**/b\n", "a/x/y/b"));
399        assert!(!matches(b"a/**/b\n", "ax/y/b"));
400        assert!(!matches(b"a/**/b\n", "a/x/yb"));
401        assert!(!matches(b"a/**/b\n", "ab"));
402    }
403
404    #[test]
405    fn test_gitignore_internal_dir_glob_not_really() {
406        assert!(!matches(b"a/x**y/b\n", "a/b"));
407        assert!(matches(b"a/x**y/b\n", "a/xy/b"));
408        assert!(matches(b"a/x**y/b\n", "a/xzzzy/b"));
409    }
410
411    #[test]
412    fn test_gitignore_line_ordering() {
413        assert!(matches(b"foo\n!foo/bar\n", "foo"));
414        assert!(!matches(b"foo\n!foo/bar\n", "foo/bar"));
415        assert!(matches(b"foo\n!foo/bar\n", "foo/baz"));
416        assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo"));
417        assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar"));
418        assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/baz"));
419        assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/quux"));
420        assert!(!matches(b"foo/*\n!foo/bar", "foo/bar"));
421    }
422
423    #[test]
424    fn test_gitignore_file_ordering() {
425        let file1 = GitIgnoreFile::empty()
426            .chain("", Path::new(""), b"/foo\n")
427            .unwrap();
428        let file2 = file1.chain("foo/", Path::new(""), b"!/bar").unwrap();
429        let file3 = file2.chain("foo/bar/", Path::new(""), b"/baz").unwrap();
430        assert!(file1.matches("foo"));
431        assert!(file1.matches("foo/bar"));
432        assert!(!file2.matches("foo/bar"));
433        assert!(!file2.matches("foo/bar/baz"));
434        assert!(file2.matches("foo/baz"));
435        assert!(file3.matches("foo/bar/baz"));
436        assert!(!file3.matches("foo/bar/qux"));
437    }
438
439    #[test]
440    fn test_gitignore_negative_parent_directory() {
441        // The following script shows that Git ignores the file:
442        //
443        // ```bash
444        // $ rm -rf test-repo && \
445        //   git init test-repo &>/dev/null && \
446        //   cd test-repo && \
447        //   printf 'A/B.*\n!/A/\n' >.gitignore && \
448        //   mkdir A && \
449        //   touch A/B.ext && \
450        //   git check-ignore A/B.ext
451        // A/B.ext
452        // ```
453        let ignore = GitIgnoreFile::empty()
454            .chain("", Path::new(""), b"foo/bar.*\n!/foo/\n")
455            .unwrap();
456        assert!(ignore.matches("foo/bar.ext"));
457
458        let ignore = GitIgnoreFile::empty()
459            .chain("", Path::new(""), b"!/foo/\nfoo/bar.*\n")
460            .unwrap();
461        assert!(ignore.matches("foo/bar.ext"));
462    }
463
464    #[test]
465    fn test_gitignore_invalid_utf8() {
466        // This tests that comments are not parsed
467        // The following slice is the byte representation of the following comment
468        // string:
469        //#à
470        let non_ascii_bytes = [35, 224];
471
472        let ignore = GitIgnoreFile::empty().chain("", Path::new(""), &non_ascii_bytes);
473        assert!(ignore.is_ok());
474
475        // Test without the leading #
476        let ignore = GitIgnoreFile::empty().chain("", Path::new(""), &non_ascii_bytes[1..]);
477        assert!(ignore.is_err());
478    }
479}