1use std::fs::File;
16use std::io::Read;
17use std::path::PathBuf;
18use std::sync::Arc;
19
20use itertools::Itertools;
21use regex::{escape as regex_escape, Regex};
22
23#[derive(Debug)]
24struct GitIgnoreLine {
25 is_negative: bool,
26 regex: Regex,
27}
28
29impl GitIgnoreLine {
30 fn remove_trailing_space(input: &str) -> &str {
33 let input = input.strip_suffix('\r').unwrap_or(input);
34 let mut it = input.char_indices().rev().peekable();
35 while let Some((i, c)) = it.next() {
36 if c != ' ' {
37 return &input[..i + c.len_utf8()];
38 }
39 if matches!(it.peek(), Some((_, '\\'))) {
40 if it.skip(1).take_while(|(_, b)| *b == '\\').count() % 2 == 1 {
41 return &input[..i];
42 }
43 return &input[..i + 1];
44 }
45 }
46 ""
47 }
48
49 fn parse(prefix: &str, input: &str) -> Option<GitIgnoreLine> {
50 assert!(prefix.is_empty() || prefix.ends_with('/'));
51 if input.starts_with('#') {
52 return None;
53 }
54
55 let input = GitIgnoreLine::remove_trailing_space(input);
56 let (is_negative, input) = match input.strip_prefix('!') {
59 None => (false, input),
60 Some(rest) => (true, rest),
61 };
62 if input.is_empty() {
63 return None;
64 }
65
66 let (matches_only_directory, input) = match input.strip_suffix('/') {
67 None => (false, input),
68 Some(rest) => (true, rest),
69 };
70 let (mut is_rooted, input) = match input.strip_prefix('/') {
71 None => (false, input),
72 Some(rest) => (true, rest),
73 };
74 is_rooted |= input.contains('/');
75
76 let mut regex = String::new();
77 regex.push('^');
78 regex.push_str(prefix);
79 if !is_rooted {
80 regex.push_str("(.*/)?");
81 }
82
83 let components = input.split('/').collect_vec();
84 for (i, component) in components.iter().enumerate() {
85 if *component == "**" {
86 if i == components.len() - 1 {
87 regex.push_str(".*");
88 } else {
89 regex.push_str("(.*/)?");
90 }
91 } else {
92 let mut in_escape = false;
93 let mut character_class: Option<String> = None;
94 for c in component.chars() {
95 if in_escape {
96 in_escape = false;
97 if !matches!(c, ' ' | '#' | '!' | '?' | '\\' | '*') {
98 regex.push_str(®ex_escape("\\"));
99 }
100 regex.push_str(®ex_escape(&c.to_string()));
101 } else if c == '\\' {
102 in_escape = true;
103 } else if let Some(characters) = &mut character_class {
104 if c == ']' {
105 regex.push('[');
106 regex.push_str(characters);
107 regex.push(']');
108 character_class = None;
109 } else {
110 characters.push(c);
111 }
112 } else {
113 in_escape = false;
114 if c == '?' {
115 regex.push_str("[^/]");
116 } else if c == '*' {
117 regex.push_str("[^/]*");
118 } else if c == '[' {
119 character_class = Some(String::new());
120 } else {
121 regex.push_str(®ex_escape(&c.to_string()));
122 }
123 }
124 }
125 if in_escape {
126 regex.push_str(®ex_escape("\\"));
127 }
128 if i < components.len() - 1 {
129 regex.push('/');
130 }
131 }
132 }
133 if matches_only_directory {
134 regex.push_str("/.*");
135 } else {
136 regex.push_str("(/.*|$)");
137 }
138 let regex = Regex::new(®ex).unwrap();
139
140 Some(GitIgnoreLine { is_negative, regex })
141 }
142
143 fn matches(&self, path: &str) -> bool {
144 self.regex.is_match(path)
145 }
146}
147
148#[derive(Debug)]
149pub struct GitIgnoreFile {
150 parent: Option<Arc<GitIgnoreFile>>,
151 lines: Vec<GitIgnoreLine>,
152}
153
154impl GitIgnoreFile {
155 pub fn empty() -> Arc<GitIgnoreFile> {
156 Arc::new(GitIgnoreFile {
157 parent: None,
158 lines: vec![],
159 })
160 }
161
162 pub fn chain(self: &Arc<GitIgnoreFile>, prefix: &str, input: &[u8]) -> Arc<GitIgnoreFile> {
163 let mut lines = vec![];
164 for input_line in input.split(|b| *b == b'\n') {
165 if let Ok(line_string) = String::from_utf8(input_line.to_vec()) {
167 if let Some(line) = GitIgnoreLine::parse(prefix, &line_string) {
168 lines.push(line);
169 }
170 }
171 }
172
173 Arc::new(GitIgnoreFile {
174 parent: Some(self.clone()),
175 lines,
176 })
177 }
178
179 pub fn chain_with_file(
180 self: &Arc<GitIgnoreFile>,
181 prefix: &str,
182 file: PathBuf,
183 ) -> Arc<GitIgnoreFile> {
184 if file.is_file() {
185 let mut file = File::open(file).unwrap();
186 let mut buf = Vec::new();
187 file.read_to_end(&mut buf).unwrap();
188 self.chain(prefix, &buf)
189 } else {
190 self.clone()
191 }
192 }
193
194 fn all_lines_reversed<'a>(&'a self) -> Box<dyn Iterator<Item = &GitIgnoreLine> + 'a> {
195 if let Some(parent) = &self.parent {
196 Box::new(self.lines.iter().rev().chain(parent.all_lines_reversed()))
197 } else {
198 Box::new(self.lines.iter().rev())
199 }
200 }
201
202 pub fn matches_file(&self, path: &str) -> bool {
203 for line in self.all_lines_reversed() {
205 if line.matches(path) {
206 return !line.is_negative;
207 }
208 }
209 false
210 }
211
212 pub fn matches_all_files_in(&self, dir: &str) -> bool {
213 assert!(dir.is_empty() || dir.ends_with('/'));
215 for line in self.all_lines_reversed() {
216 if line.is_negative {
223 return false;
224 }
225 if line.matches(dir) {
226 return true;
227 }
228 }
229 false
230 }
231}
232
233#[cfg(test)]
234mod tests {
235
236 use super::*;
237
238 fn matches_file(input: &[u8], path: &str) -> bool {
239 let file = GitIgnoreFile::empty().chain("", input);
240 file.matches_file(path)
241 }
242
243 fn matches_all_files_in(input: &[u8], path: &str) -> bool {
244 let file = GitIgnoreFile::empty().chain("", input);
245 file.matches_all_files_in(path)
246 }
247
248 #[test]
249 fn test_gitignore_empty_file() {
250 let file = GitIgnoreFile::empty();
251 assert!(!file.matches_file("foo"));
252 }
253
254 #[test]
255 fn test_gitignore_empty_file_with_prefix() {
256 let file = GitIgnoreFile::empty().chain("dir/", b"");
257 assert!(!file.matches_file("dir/foo"));
258 }
259
260 #[test]
261 fn test_gitignore_literal() {
262 let file = GitIgnoreFile::empty().chain("", b"foo\n");
263 assert!(file.matches_file("foo"));
264 assert!(file.matches_file("dir/foo"));
265 assert!(file.matches_file("dir/subdir/foo"));
266 assert!(!file.matches_file("food"));
267 assert!(!file.matches_file("dir/food"));
268 }
269
270 #[test]
271 fn test_gitignore_literal_with_prefix() {
272 let file = GitIgnoreFile::empty().chain("dir/", b"foo\n");
273 assert!(!file.matches_file("foo"));
276 assert!(file.matches_file("dir/foo"));
277 assert!(file.matches_file("dir/subdir/foo"));
278 }
279
280 #[test]
281 fn test_gitignore_pattern_same_as_prefix() {
282 let file = GitIgnoreFile::empty().chain("dir/", b"dir\n");
283 assert!(file.matches_file("dir/dir"));
284 assert!(!file.matches_file("dir/foo"));
286 }
287
288 #[test]
289 fn test_gitignore_rooted_literal() {
290 let file = GitIgnoreFile::empty().chain("", b"/foo\n");
291 assert!(file.matches_file("foo"));
292 assert!(!file.matches_file("dir/foo"));
293 }
294
295 #[test]
296 fn test_gitignore_rooted_literal_with_prefix() {
297 let file = GitIgnoreFile::empty().chain("dir/", b"/foo\n");
298 assert!(!file.matches_file("foo"));
301 assert!(file.matches_file("dir/foo"));
302 assert!(!file.matches_file("dir/subdir/foo"));
303 }
304
305 #[test]
306 fn test_gitignore_deep_dir() {
307 let file = GitIgnoreFile::empty().chain("", b"/dir1/dir2/dir3\n");
308 assert!(!file.matches_file("foo"));
309 assert!(!file.matches_file("dir1/foo"));
310 assert!(!file.matches_file("dir1/dir2/foo"));
311 assert!(file.matches_file("dir1/dir2/dir3/foo"));
312 assert!(file.matches_file("dir1/dir2/dir3/dir4/foo"));
313 }
314
315 #[test]
316 fn test_gitignore_match_only_dir() {
317 let file = GitIgnoreFile::empty().chain("", b"/dir/\n");
318 assert!(!file.matches_file("dir"));
319 assert!(file.matches_file("dir/foo"));
320 assert!(file.matches_file("dir/subdir/foo"));
321 }
322
323 #[test]
324 fn test_gitignore_unusual_symbols() {
325 assert!(matches_file(b"\\*\n", "*"));
326 assert!(!matches_file(b"\\*\n", "foo"));
327 assert!(matches_file(b"\\\n", "\\"));
328 assert!(matches_file(b"\\!\n", "!"));
329 assert!(matches_file(b"\\?\n", "?"));
330 assert!(!matches_file(b"\\?\n", "x"));
331 assert!(matches_file(b"\\w\n", "\\w"));
333 assert!(!matches_file(b"\\w\n", "w"));
334 }
335
336 #[test]
337 fn test_gitignore_whitespace() {
338 assert!(!matches_file(b" \n", " "));
339 assert!(matches_file(b"\\ \n", " "));
340 assert!(matches_file(b"\\\\ \n", "\\"));
341 assert!(!matches_file(b"\\\\ \n", " "));
342 assert!(matches_file(b"\\\\\\ \n", "\\ "));
343 assert!(matches_file(b" a\n", " a"));
344 assert!(matches_file(b"a b\n", "a b"));
345 assert!(matches_file(b"a b \n", "a b"));
346 assert!(!matches_file(b"a b \n", "a b "));
347 assert!(matches_file(b"a b\\ \\ \n", "a b "));
348 assert!(matches_file(b"a b \\ \n", "a b "));
351 assert!(matches_file(b"a\r\n", "a"));
353 assert!(!matches_file(b"a\r\n", "a\r"));
354 assert!(matches_file(b"a\r\r\n", "a\r"));
355 assert!(!matches_file(b"a\r\r\n", "a\r\r"));
356 assert!(matches_file(b"\ra\n", "\ra"));
357 assert!(!matches_file(b"\ra\n", "a"));
358 }
359
360 #[test]
361 fn test_gitignore_glob() {
362 assert!(!matches_file(b"*.o\n", "foo"));
363 assert!(matches_file(b"*.o\n", "foo.o"));
364 assert!(!matches_file(b"foo.?\n", "foo"));
365 assert!(!matches_file(b"foo.?\n", "foo."));
366 assert!(matches_file(b"foo.?\n", "foo.o"));
367 }
368
369 #[test]
370 fn test_gitignore_range() {
371 assert!(!matches_file(b"foo.[az]\n", "foo"));
372 assert!(matches_file(b"foo.[az]\n", "foo.a"));
373 assert!(!matches_file(b"foo.[az]\n", "foo.g"));
374 assert!(matches_file(b"foo.[az]\n", "foo.z"));
375 assert!(!matches_file(b"foo.[a-z]\n", "foo"));
376 assert!(matches_file(b"foo.[a-z]\n", "foo.a"));
377 assert!(matches_file(b"foo.[a-z]\n", "foo.g"));
378 assert!(matches_file(b"foo.[a-z]\n", "foo.z"));
379 assert!(matches_file(b"foo.[0-9a-fA-F]\n", "foo.5"));
380 assert!(matches_file(b"foo.[0-9a-fA-F]\n", "foo.c"));
381 assert!(matches_file(b"foo.[0-9a-fA-F]\n", "foo.E"));
382 assert!(!matches_file(b"foo.[0-9a-fA-F]\n", "foo._"));
383 }
384
385 #[test]
386 fn test_gitignore_leading_dir_glob() {
387 assert!(matches_file(b"**/foo\n", "foo"));
388 assert!(matches_file(b"**/foo\n", "dir1/dir2/foo"));
389 assert!(matches_file(b"**/foo\n", "foo/file"));
390 assert!(matches_file(b"**/dir/foo\n", "dir/foo"));
391 assert!(matches_file(b"**/dir/foo\n", "dir1/dir2/dir/foo"));
392 }
393
394 #[test]
395 fn test_gitignore_leading_dir_glob_with_prefix() {
396 let file = GitIgnoreFile::empty().chain("dir1/dir2/", b"**/foo\n");
397 assert!(!file.matches_file("foo"));
400 assert!(file.matches_file("dir1/dir2/foo"));
401 assert!(!file.matches_file("dir1/dir2/bar"));
402 assert!(file.matches_file("dir1/dir2/sub1/sub2/foo"));
403 assert!(!file.matches_file("dir1/dir2/sub1/sub2/bar"));
404 }
405
406 #[test]
407 fn test_gitignore_trailing_dir_glob() {
408 assert!(!matches_file(b"abc/**\n", "abc"));
409 assert!(matches_file(b"abc/**\n", "abc/file"));
410 assert!(matches_file(b"abc/**\n", "abc/dir/file"));
411 }
412
413 #[test]
414 fn test_gitignore_internal_dir_glob() {
415 assert!(matches_file(b"a/**/b\n", "a/b"));
416 assert!(matches_file(b"a/**/b\n", "a/x/b"));
417 assert!(matches_file(b"a/**/b\n", "a/x/y/b"));
418 assert!(!matches_file(b"a/**/b\n", "ax/y/b"));
419 assert!(!matches_file(b"a/**/b\n", "a/x/yb"));
420 assert!(!matches_file(b"a/**/b\n", "ab"));
421 }
422
423 #[test]
424 fn test_gitignore_internal_dir_glob_not_really() {
425 assert!(!matches_file(b"a/x**y/b\n", "a/b"));
426 assert!(matches_file(b"a/x**y/b\n", "a/xy/b"));
427 assert!(matches_file(b"a/x**y/b\n", "a/xzzzy/b"));
428 }
429
430 #[test]
431 fn test_gitignore_line_ordering() {
432 assert!(matches_file(b"foo\n!foo/bar\n", "foo"));
433 assert!(!matches_file(b"foo\n!foo/bar\n", "foo/bar"));
434 assert!(matches_file(b"foo\n!foo/bar\n", "foo/baz"));
435 assert!(matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo"));
436 assert!(!matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar"));
437 assert!(matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/baz"));
438 assert!(!matches_file(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/quux"));
439 }
440
441 #[test]
442 fn test_gitignore_file_ordering() {
443 let file1 = GitIgnoreFile::empty().chain("", b"foo\n");
444 let file2 = file1.chain("foo/", b"!bar");
445 let file3 = file2.chain("foo/bar/", b"baz");
446 assert!(file1.matches_file("foo"));
447 assert!(file1.matches_file("foo/bar"));
448 assert!(!file2.matches_file("foo/bar"));
449 assert!(file2.matches_file("foo/baz"));
450 assert!(file3.matches_file("foo/bar/baz"));
451 assert!(!file3.matches_file("foo/bar/qux"));
452 }
453
454 #[test]
455 fn test_gitignore_match_dir() {
456 assert!(matches_all_files_in(b"foo\n", "foo/"));
457 assert!(matches_all_files_in(b"foo\nbar\n", "foo/"));
458 assert!(matches_all_files_in(b"!foo\nbar\n", "bar/"));
459 assert!(!matches_all_files_in(b"foo\n!bar\n", "foo/"));
460 assert!(!matches_all_files_in(b"foo\n!/bar\n", "foo/"));
462 }
463}