jujutsu_lib/
gitignore.rs

1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fs::File;
16use std::io::Read;
17use std::path::PathBuf;
18use std::sync::Arc;
19
20use itertools::Itertools;
21use regex::{escape as regex_escape, Regex};
22
23#[derive(Debug)]
24struct GitIgnoreLine {
25    is_negative: bool,
26    regex: Regex,
27}
28
29impl GitIgnoreLine {
30    // Remove trailing spaces (unless backslash-escaped). Any character
31    // can be backslash-escaped as well.
32    fn remove_trailing_space(input: &str) -> &str {
33        let input = input.strip_suffix('\r').unwrap_or(input);
34        let mut it = input.char_indices().rev().peekable();
35        while let Some((i, c)) = it.next() {
36            if c != ' ' {
37                return &input[..i + c.len_utf8()];
38            }
39            if matches!(it.peek(), Some((_, '\\'))) {
40                if it.skip(1).take_while(|(_, b)| *b == '\\').count() % 2 == 1 {
41                    return &input[..i];
42                }
43                return &input[..i + 1];
44            }
45        }
46        ""
47    }
48
49    fn parse(prefix: &str, input: &str) -> Option<GitIgnoreLine> {
50        assert!(prefix.is_empty() || prefix.ends_with('/'));
51        if input.starts_with('#') {
52            return None;
53        }
54
55        let input = GitIgnoreLine::remove_trailing_space(input);
56        // Remove leading "!" before checking for empty to match git's implementation
57        // (i.e. just "!" matching nothing, not everything).
58        let (is_negative, input) = match input.strip_prefix('!') {
59            None => (false, input),
60            Some(rest) => (true, rest),
61        };
62        if input.is_empty() {
63            return None;
64        }
65
66        let (matches_only_directory, input) = match input.strip_suffix('/') {
67            None => (false, input),
68            Some(rest) => (true, rest),
69        };
70        let (mut is_rooted, input) = match input.strip_prefix('/') {
71            None => (false, input),
72            Some(rest) => (true, rest),
73        };
74        is_rooted |= input.contains('/');
75
76        let mut regex = String::new();
77        regex.push('^');
78        regex.push_str(prefix);
79        if !is_rooted {
80            regex.push_str("(.*/)?");
81        }
82
83        let components = input.split('/').collect_vec();
84        for (i, component) in components.iter().enumerate() {
85            if *component == "**" {
86                if i == components.len() - 1 {
87                    regex.push_str(".*");
88                } else {
89                    regex.push_str("(.*/)?");
90                }
91            } else {
92                let mut in_escape = false;
93                let mut character_class: Option<String> = None;
94                for c in component.chars() {
95                    if in_escape {
96                        in_escape = false;
97                        if !matches!(c, ' ' | '#' | '!' | '?' | '\\' | '*') {
98                            regex.push_str(&regex_escape("\\"));
99                        }
100                        regex.push_str(&regex_escape(&c.to_string()));
101                    } else if c == '\\' {
102                        in_escape = true;
103                    } else if let Some(characters) = &mut character_class {
104                        if c == ']' {
105                            regex.push('[');
106                            regex.push_str(characters);
107                            regex.push(']');
108                            character_class = None;
109                        } else {
110                            characters.push(c);
111                        }
112                    } else {
113                        in_escape = false;
114                        if c == '?' {
115                            regex.push_str("[^/]");
116                        } else if c == '*' {
117                            regex.push_str("[^/]*");
118                        } else if c == '[' {
119                            character_class = Some(String::new());
120                        } else {
121                            regex.push_str(&regex_escape(&c.to_string()));
122                        }
123                    }
124                }
125                if in_escape {
126                    regex.push_str(&regex_escape("\\"));
127                }
128                if i < components.len() - 1 {
129                    regex.push('/');
130                }
131            }
132        }
133        if matches_only_directory {
134            regex.push_str("/.*");
135        } else {
136            regex.push_str("(/.*|$)");
137        }
138        let regex = Regex::new(&regex).unwrap();
139
140        Some(GitIgnoreLine { is_negative, regex })
141    }
142
143    fn matches(&self, path: &str) -> bool {
144        self.regex.is_match(path)
145    }
146}
147
148#[derive(Debug)]
149pub struct GitIgnoreFile {
150    parent: Option<Arc<GitIgnoreFile>>,
151    lines: Vec<GitIgnoreLine>,
152}
153
154impl GitIgnoreFile {
155    pub fn empty() -> Arc<GitIgnoreFile> {
156        Arc::new(GitIgnoreFile {
157            parent: None,
158            lines: vec![],
159        })
160    }
161
162    pub fn chain(self: &Arc<GitIgnoreFile>, prefix: &str, input: &[u8]) -> Arc<GitIgnoreFile> {
163        let mut lines = vec![];
164        for input_line in input.split(|b| *b == b'\n') {
165            // Skip non-utf8 lines
166            if let Ok(line_string) = String::from_utf8(input_line.to_vec()) {
167                if let Some(line) = GitIgnoreLine::parse(prefix, &line_string) {
168                    lines.push(line);
169                }
170            }
171        }
172
173        Arc::new(GitIgnoreFile {
174            parent: Some(self.clone()),
175            lines,
176        })
177    }
178
179    pub fn chain_with_file(
180        self: &Arc<GitIgnoreFile>,
181        prefix: &str,
182        file: PathBuf,
183    ) -> Arc<GitIgnoreFile> {
184        if file.is_file() {
185            let mut file = File::open(file).unwrap();
186            let mut buf = Vec::new();
187            file.read_to_end(&mut buf).unwrap();
188            self.chain(prefix, &buf)
189        } else {
190            self.clone()
191        }
192    }
193
194    fn all_lines_reversed<'a>(&'a self) -> Box<dyn Iterator<Item = &GitIgnoreLine> + 'a> {
195        if let Some(parent) = &self.parent {
196            Box::new(self.lines.iter().rev().chain(parent.all_lines_reversed()))
197        } else {
198            Box::new(self.lines.iter().rev())
199        }
200    }
201
202    pub fn matches_file(&self, path: &str) -> bool {
203        // Later lines take precedence, so check them in reverse
204        for line in self.all_lines_reversed() {
205            if line.matches(path) {
206                return !line.is_negative;
207            }
208        }
209        false
210    }
211
212    pub fn matches_all_files_in(&self, dir: &str) -> bool {
213        // Later lines take precedence, so check them in reverse
214        assert!(dir.is_empty() || dir.ends_with('/'));
215        for line in self.all_lines_reversed() {
216            // Let's say there's a "/target/" pattern and then a "!interesting" pattern
217            // after it, then we can't say for sure that all files in target/ match.
218            // TODO: This can be smarter. For example, if there's a pattern "/foo/" followed
219            // by "!/bar/", then we can answer "true" for "foo/". A more complex
220            // case is if a pattern "/foo/" is followed "!/foo/bar/", then we
221            // can say "false" for "foo/" and "true" for "foo/baz/".
222            if line.is_negative {
223                return false;
224            }
225            if line.matches(dir) {
226                return true;
227            }
228        }
229        false
230    }
231}
232
233#[cfg(test)]
234mod tests {
235
236    use super::*;
237
238    fn matches_file(input: &[u8], path: &str) -> bool {
239        let file = GitIgnoreFile::empty().chain("", input);
240        file.matches_file(path)
241    }
242
243    fn matches_all_files_in(input: &[u8], path: &str) -> bool {
244        let file = GitIgnoreFile::empty().chain("", input);
245        file.matches_all_files_in(path)
246    }
247
248    #[test]
249    fn test_gitignore_empty_file() {
250        let file = GitIgnoreFile::empty();
251        assert!(!file.matches_file("foo"));
252    }
253
254    #[test]
255    fn test_gitignore_empty_file_with_prefix() {
256        let file = GitIgnoreFile::empty().chain("dir/", b"");
257        assert!(!file.matches_file("dir/foo"));
258    }
259
260    #[test]
261    fn test_gitignore_literal() {
262        let file = GitIgnoreFile::empty().chain("", b"foo\n");
263        assert!(file.matches_file("foo"));
264        assert!(file.matches_file("dir/foo"));
265        assert!(file.matches_file("dir/subdir/foo"));
266        assert!(!file.matches_file("food"));
267        assert!(!file.matches_file("dir/food"));
268    }
269
270    #[test]
271    fn test_gitignore_literal_with_prefix() {
272        let file = GitIgnoreFile::empty().chain("dir/", b"foo\n");
273        // I consider it undefined whether a file in a parent directory matches, but
274        // let's test it anyway
275        assert!(!file.matches_file("foo"));
276        assert!(file.matches_file("dir/foo"));
277        assert!(file.matches_file("dir/subdir/foo"));
278    }
279
280    #[test]
281    fn test_gitignore_pattern_same_as_prefix() {
282        let file = GitIgnoreFile::empty().chain("dir/", b"dir\n");
283        assert!(file.matches_file("dir/dir"));
284        // We don't want the "dir" pattern to apply to the parent directory
285        assert!(!file.matches_file("dir/foo"));
286    }
287
288    #[test]
289    fn test_gitignore_rooted_literal() {
290        let file = GitIgnoreFile::empty().chain("", b"/foo\n");
291        assert!(file.matches_file("foo"));
292        assert!(!file.matches_file("dir/foo"));
293    }
294
295    #[test]
296    fn test_gitignore_rooted_literal_with_prefix() {
297        let file = GitIgnoreFile::empty().chain("dir/", b"/foo\n");
298        // I consider it undefined whether a file in a parent directory matches, but
299        // let's test it anyway
300        assert!(!file.matches_file("foo"));
301        assert!(file.matches_file("dir/foo"));
302        assert!(!file.matches_file("dir/subdir/foo"));
303    }
304
305    #[test]
306    fn test_gitignore_deep_dir() {
307        let file = GitIgnoreFile::empty().chain("", b"/dir1/dir2/dir3\n");
308        assert!(!file.matches_file("foo"));
309        assert!(!file.matches_file("dir1/foo"));
310        assert!(!file.matches_file("dir1/dir2/foo"));
311        assert!(file.matches_file("dir1/dir2/dir3/foo"));
312        assert!(file.matches_file("dir1/dir2/dir3/dir4/foo"));
313    }
314
315    #[test]
316    fn test_gitignore_match_only_dir() {
317        let file = GitIgnoreFile::empty().chain("", b"/dir/\n");
318        assert!(!file.matches_file("dir"));
319        assert!(file.matches_file("dir/foo"));
320        assert!(file.matches_file("dir/subdir/foo"));
321    }
322
323    #[test]
324    fn test_gitignore_unusual_symbols() {
325        assert!(matches_file(b"\\*\n", "*"));
326        assert!(!matches_file(b"\\*\n", "foo"));
327        assert!(matches_file(b"\\\n", "\\"));
328        assert!(matches_file(b"\\!\n", "!"));
329        assert!(matches_file(b"\\?\n", "?"));
330        assert!(!matches_file(b"\\?\n", "x"));
331        // Invalid escapes are treated like literal backslashes
332        assert!(matches_file(b"\\w\n", "\\w"));
333        assert!(!matches_file(b"\\w\n", "w"));
334    }
335
336    #[test]
337    fn test_gitignore_whitespace() {
338        assert!(!matches_file(b" \n", " "));
339        assert!(matches_file(b"\\ \n", " "));
340        assert!(matches_file(b"\\\\ \n", "\\"));
341        assert!(!matches_file(b"\\\\ \n", " "));
342        assert!(matches_file(b"\\\\\\ \n", "\\ "));
343        assert!(matches_file(b" a\n", " a"));
344        assert!(matches_file(b"a b\n", "a b"));
345        assert!(matches_file(b"a b \n", "a b"));
346        assert!(!matches_file(b"a b \n", "a b "));
347        assert!(matches_file(b"a b\\ \\ \n", "a b  "));
348        // It's unclear how this should be interpreted, but we count spaces before
349        // escaped spaces
350        assert!(matches_file(b"a b \\  \n", "a b  "));
351        // A single CR at EOL is ignored
352        assert!(matches_file(b"a\r\n", "a"));
353        assert!(!matches_file(b"a\r\n", "a\r"));
354        assert!(matches_file(b"a\r\r\n", "a\r"));
355        assert!(!matches_file(b"a\r\r\n", "a\r\r"));
356        assert!(matches_file(b"\ra\n", "\ra"));
357        assert!(!matches_file(b"\ra\n", "a"));
358    }
359
360    #[test]
361    fn test_gitignore_glob() {
362        assert!(!matches_file(b"*.o\n", "foo"));
363        assert!(matches_file(b"*.o\n", "foo.o"));
364        assert!(!matches_file(b"foo.?\n", "foo"));
365        assert!(!matches_file(b"foo.?\n", "foo."));
366        assert!(matches_file(b"foo.?\n", "foo.o"));
367    }
368
369    #[test]
370    fn test_gitignore_range() {
371        assert!(!matches_file(b"foo.[az]\n", "foo"));
372        assert!(matches_file(b"foo.[az]\n", "foo.a"));
373        assert!(!matches_file(b"foo.[az]\n", "foo.g"));
374        assert!(matches_file(b"foo.[az]\n", "foo.z"));
375        assert!(!matches_file(b"foo.[a-z]\n", "foo"));
376        assert!(matches_file(b"foo.[a-z]\n", "foo.a"));
377        assert!(matches_file(b"foo.[a-z]\n", "foo.g"));
378        assert!(matches_file(b"foo.[a-z]\n", "foo.z"));
379        assert!(matches_file(b"foo.[0-9a-fA-F]\n", "foo.5"));
380        assert!(matches_file(b"foo.[0-9a-fA-F]\n", "foo.c"));
381        assert!(matches_file(b"foo.[0-9a-fA-F]\n", "foo.E"));
382        assert!(!matches_file(b"foo.[0-9a-fA-F]\n", "foo._"));
383    }
384
385    #[test]
386    fn test_gitignore_leading_dir_glob() {
387        assert!(matches_file(b"**/foo\n", "foo"));
388        assert!(matches_file(b"**/foo\n", "dir1/dir2/foo"));
389        assert!(matches_file(b"**/foo\n", "foo/file"));
390        assert!(matches_file(b"**/dir/foo\n", "dir/foo"));
391        assert!(matches_file(b"**/dir/foo\n", "dir1/dir2/dir/foo"));
392    }
393
394    #[test]
395    fn test_gitignore_leading_dir_glob_with_prefix() {
396        let file = GitIgnoreFile::empty().chain("dir1/dir2/", b"**/foo\n");
397        // I consider it undefined whether a file in a parent directory matches, but
398        // let's test it anyway
399        assert!(!file.matches_file("foo"));
400        assert!(file.matches_file("dir1/dir2/foo"));
401        assert!(!file.matches_file("dir1/dir2/bar"));
402        assert!(file.matches_file("dir1/dir2/sub1/sub2/foo"));
403        assert!(!file.matches_file("dir1/dir2/sub1/sub2/bar"));
404    }
405
406    #[test]
407    fn test_gitignore_trailing_dir_glob() {
408        assert!(!matches_file(b"abc/**\n", "abc"));
409        assert!(matches_file(b"abc/**\n", "abc/file"));
410        assert!(matches_file(b"abc/**\n", "abc/dir/file"));
411    }
412
413    #[test]
414    fn test_gitignore_internal_dir_glob() {
415        assert!(matches_file(b"a/**/b\n", "a/b"));
416        assert!(matches_file(b"a/**/b\n", "a/x/b"));
417        assert!(matches_file(b"a/**/b\n", "a/x/y/b"));
418        assert!(!matches_file(b"a/**/b\n", "ax/y/b"));
419        assert!(!matches_file(b"a/**/b\n", "a/x/yb"));
420        assert!(!matches_file(b"a/**/b\n", "ab"));
421    }
422
423    #[test]
424    fn test_gitignore_internal_dir_glob_not_really() {
425        assert!(!matches_file(b"a/x**y/b\n", "a/b"));
426        assert!(matches_file(b"a/x**y/b\n", "a/xy/b"));
427        assert!(matches_file(b"a/x**y/b\n", "a/xzzzy/b"));
428    }
429
430    #[test]
431    fn test_gitignore_line_ordering() {
432        assert!(matches_file(b"foo\n!foo/bar\n", "foo"));
433        assert!(!matches_file(b"foo\n!foo/bar\n", "foo/bar"));
434        assert!(matches_file(b"foo\n!foo/bar\n", "foo/baz"));
435        assert!(matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo"));
436        assert!(!matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar"));
437        assert!(matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/baz"));
438        assert!(!matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/quux"));
439    }
440
441    #[test]
442    fn test_gitignore_file_ordering() {
443        let file1 = GitIgnoreFile::empty().chain("", b"foo\n");
444        let file2 = file1.chain("foo/", b"!bar");
445        let file3 = file2.chain("foo/bar/", b"baz");
446        assert!(file1.matches_file("foo"));
447        assert!(file1.matches_file("foo/bar"));
448        assert!(!file2.matches_file("foo/bar"));
449        assert!(file2.matches_file("foo/baz"));
450        assert!(file3.matches_file("foo/bar/baz"));
451        assert!(!file3.matches_file("foo/bar/qux"));
452    }
453
454    #[test]
455    fn test_gitignore_match_dir() {
456        assert!(matches_all_files_in(b"foo\n", "foo/"));
457        assert!(matches_all_files_in(b"foo\nbar\n", "foo/"));
458        assert!(matches_all_files_in(b"!foo\nbar\n", "bar/"));
459        assert!(!matches_all_files_in(b"foo\n!bar\n", "foo/"));
460        // This one could return true, but it doesn't currently
461        assert!(!matches_all_files_in(b"foo\n!/bar\n", "foo/"));
462    }
463}