1#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct PathspecElement {
28 pattern: Vec<u8>,
30 exclude: bool,
32 icase: bool,
34 literal: bool,
36 glob: bool,
38 top: bool,
43 attrs: Vec<Vec<u8>>,
48}
49
50impl PathspecElement {
51 pub fn parse(arg: &[u8], defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError> {
59 let mut exclude = false;
60 let mut icase = defaults.icase;
61 let mut literal = defaults.literal;
62 let mut glob = defaults.glob;
63 let mut top = false;
64 let mut attrs: Vec<Vec<u8>> = Vec::new();
65
66 let rest = if let Some(after) = arg.strip_prefix(b":(") {
67 let close = after
69 .iter()
70 .position(|&c| c == b')')
71 .ok_or(PathspecParseError::UnterminatedMagic)?;
72 let magic = &after[..close];
73 for word in split_magic(magic) {
74 match word.as_slice() {
75 b"exclude" => exclude = true,
76 b"icase" => icase = true,
77 b"literal" => literal = true,
78 b"glob" => glob = true,
79 b"top" => top = true,
80 other => {
81 if let Some(attr) = other.strip_prefix(b"attr:") {
82 attrs.push(attr.to_vec());
83 } else if other.is_empty() {
84 } else {
87 return Err(PathspecParseError::UnknownMagic(other.to_vec()));
88 }
89 }
90 }
91 }
92 &after[close + 1..]
93 } else if let Some(after) = arg.strip_prefix(b":") {
94 let mut idx = 0;
96 while idx < after.len() {
97 match after[idx] {
98 b'!' | b'^' => exclude = true,
99 b'/' => top = true,
100 _ => break,
101 }
102 idx += 1;
103 }
104 &after[idx..]
105 } else {
106 arg
107 };
108
109 if glob && literal {
111 return Err(PathspecParseError::GlobLiteralConflict);
112 }
113
114 Ok(PathspecElement {
115 pattern: rest.to_vec(),
116 exclude,
117 icase,
118 literal,
119 glob,
120 top,
121 attrs,
122 })
123 }
124
125 pub fn is_exclude(&self) -> bool {
127 self.exclude
128 }
129
130 pub fn is_top(&self) -> bool {
132 self.top
133 }
134
135 pub fn attrs(&self) -> &[Vec<u8>] {
137 &self.attrs
138 }
139
140 pub fn is_icase(&self) -> bool {
142 self.icase
143 }
144
145 pub fn is_glob(&self) -> bool {
147 self.glob
148 }
149
150 pub fn pattern(&self) -> &[u8] {
152 &self.pattern
153 }
154
155 fn magic(&self) -> PathspecMatchMagic {
157 PathspecMatchMagic {
158 literal: self.literal,
159 glob: self.glob,
160 icase: self.icase,
161 }
162 }
163
164 pub fn matches_path(&self, name: &[u8]) -> bool {
168 pathspec_item_matches(&self.pattern, name, self.magic())
169 }
170}
171
172#[derive(Debug, Clone, Default, PartialEq, Eq)]
180pub struct Pathspec {
181 elements: Vec<PathspecElement>,
182}
183
184impl Pathspec {
185 pub fn parse<I, S>(args: I, defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError>
188 where
189 I: IntoIterator<Item = S>,
190 S: AsRef<[u8]>,
191 {
192 let mut elements = Vec::new();
193 for arg in args {
194 elements.push(PathspecElement::parse(arg.as_ref(), defaults)?);
195 }
196 Ok(Pathspec { elements })
197 }
198
199 pub fn is_empty(&self) -> bool {
201 self.elements.is_empty()
202 }
203
204 pub fn elements(&self) -> &[PathspecElement] {
206 &self.elements
207 }
208
209 pub fn matches(&self, path: &[u8]) -> bool {
214 if self.elements.is_empty() {
215 return true;
216 }
217 let mut have_include = false;
218 let mut included = false;
219 for element in &self.elements {
220 if element.exclude {
221 if element.matches_path(path) {
222 return false;
223 }
224 } else {
225 have_include = true;
226 if element.matches_path(path) {
227 included = true;
228 }
229 }
230 }
231 if have_include { included } else { true }
234 }
235}
236
237fn split_magic(body: &[u8]) -> Vec<Vec<u8>> {
239 body.split(|&c| c == b',').map(|w| w.to_vec()).collect()
240}
241
242#[derive(Debug, Clone, PartialEq, Eq)]
244pub enum PathspecParseError {
245 UnterminatedMagic,
247 UnknownMagic(Vec<u8>),
249 GlobLiteralConflict,
251}
252
253impl core::fmt::Display for PathspecParseError {
254 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
255 match self {
256 PathspecParseError::UnterminatedMagic => {
257 write!(f, "Missing ')' at end of pathspec magic")
258 }
259 PathspecParseError::UnknownMagic(word) => {
260 write!(
261 f,
262 "Invalid pathspec magic '{}'",
263 String::from_utf8_lossy(word)
264 )
265 }
266 PathspecParseError::GlobLiteralConflict => {
267 write!(f, "'literal' and 'glob' are incompatible")
268 }
269 }
270 }
271}
272
273impl std::error::Error for PathspecParseError {}
274
275#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
279pub struct PathspecMatchMagic {
280 pub literal: bool,
281 pub glob: bool,
282 pub icase: bool,
283}
284
285fn is_glob_special(c: u8) -> bool {
287 matches!(c, b'*' | b'?' | b'[' | b'\\')
288}
289
290fn simple_length(s: &[u8]) -> usize {
293 for (i, &c) in s.iter().enumerate() {
294 if is_glob_special(c) {
295 return i;
296 }
297 }
298 s.len()
299}
300
301fn ps_strncmp(icase: bool, a: &[u8], b: &[u8], n: usize) -> bool {
303 let a = &a[..a.len().min(n)];
305 let b = &b[..b.len().min(n)];
306 if a.len() < n && b.len() < n && a.len() != b.len() {
307 return false;
308 }
309 let len = n.min(a.len()).min(b.len());
310 for i in 0..len {
311 let (mut ca, mut cb) = (a[i], b[i]);
312 if icase {
313 ca = ca.to_ascii_lowercase();
314 cb = cb.to_ascii_lowercase();
315 }
316 if ca != cb {
317 return false;
318 }
319 }
320 true
321}
322
323pub fn pathspec_is_glob(path: &[u8]) -> bool {
325 path.iter().any(|byte| matches!(byte, b'*' | b'?' | b'['))
326}
327
328pub fn pathspec_item_matches(match_: &[u8], name: &[u8], magic: PathspecMatchMagic) -> bool {
334 let icase = magic.icase;
335 let matchlen = match_.len();
336 let namelen = name.len();
337
338 let nowildcard_len = if magic.literal {
340 matchlen
341 } else {
342 simple_length(match_)
343 };
344
345 if matchlen == 0 {
347 return true;
348 }
349
350 if matchlen <= namelen && ps_strncmp(icase, match_, name, matchlen) {
352 if matchlen == namelen {
353 return true; }
355 if match_[matchlen - 1] == b'/' || name[matchlen] == b'/' {
356 return true; }
358 } else if match_[matchlen - 1] == b'/'
359 && namelen == matchlen - 1
360 && ps_strncmp(icase, match_, name, namelen)
361 {
362 return true;
364 }
365
366 if nowildcard_len < matchlen {
368 if nowildcard_len > 0 && !ps_strncmp(icase, match_, name, nowildcard_len) {
371 return false;
372 }
373 let pat = &match_[nowildcard_len..];
374 if name.len() < nowildcard_len {
375 return false;
376 }
377 let str_ = &name[nowildcard_len..];
378
379 let flags = if magic.glob && !magic.literal {
380 WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 }
381 } else {
382 if icase { WM_CASEFOLD } else { 0 }
384 };
385 if wildmatch(pat, str_, flags) {
386 return true;
387 }
388 }
389
390 false
391}
392
393pub const WM_CASEFOLD: u32 = 1;
395pub const WM_PATHNAME: u32 = 2;
398
399const WM_MATCH: i32 = 0;
400const WM_NOMATCH: i32 = 1;
401const WM_ABORT_ALL: i32 = -1;
402const WM_ABORT_TO_STARSTAR: i32 = -2;
403
404#[inline]
405fn wm_isascii(c: u8) -> bool {
406 c < 0x80
407}
408#[inline]
409fn wm_isupper(c: u8) -> bool {
410 wm_isascii(c) && c.is_ascii_uppercase()
411}
412#[inline]
413fn wm_islower(c: u8) -> bool {
414 wm_isascii(c) && c.is_ascii_lowercase()
415}
416#[inline]
417fn wm_tolower(c: u8) -> u8 {
418 c.to_ascii_lowercase()
419}
420#[inline]
421fn wm_toupper(c: u8) -> u8 {
422 c.to_ascii_uppercase()
423}
424#[inline]
425fn wm_is_glob_special(c: u8) -> bool {
426 matches!(c, b'*' | b'?' | b'[' | b'\\')
427}
428
429fn wm_cc_eq(class: &[u8], lit: &[u8]) -> bool {
430 class == lit
431}
432
433fn wm_class_matches(class: &[u8], t_ch: u8, flags: u32) -> Option<bool> {
434 let m = if wm_cc_eq(class, b"alnum") {
437 wm_isascii(t_ch) && t_ch.is_ascii_alphanumeric()
438 } else if wm_cc_eq(class, b"alpha") {
439 wm_isascii(t_ch) && t_ch.is_ascii_alphabetic()
440 } else if wm_cc_eq(class, b"blank") {
441 wm_isascii(t_ch) && (t_ch == b' ' || t_ch == b'\t')
442 } else if wm_cc_eq(class, b"cntrl") {
443 wm_isascii(t_ch) && t_ch.is_ascii_control()
444 } else if wm_cc_eq(class, b"digit") {
445 wm_isascii(t_ch) && t_ch.is_ascii_digit()
446 } else if wm_cc_eq(class, b"graph") {
447 wm_isascii(t_ch) && t_ch.is_ascii_graphic()
448 } else if wm_cc_eq(class, b"lower") {
449 wm_islower(t_ch)
450 } else if wm_cc_eq(class, b"print") {
451 wm_isascii(t_ch) && (0x20..=0x7e).contains(&t_ch)
453 } else if wm_cc_eq(class, b"punct") {
454 wm_isascii(t_ch) && t_ch.is_ascii_punctuation()
455 } else if wm_cc_eq(class, b"space") {
456 wm_isascii(t_ch) && t_ch.is_ascii_whitespace()
457 } else if wm_cc_eq(class, b"upper") {
458 wm_isupper(t_ch) || ((flags & WM_CASEFOLD) != 0 && wm_islower(t_ch))
459 } else if wm_cc_eq(class, b"xdigit") {
460 wm_isascii(t_ch) && t_ch.is_ascii_hexdigit()
461 } else {
462 return None;
463 };
464 Some(m)
465}
466
467fn dowild(pattern: &[u8], text: &[u8], flags: u32) -> i32 {
470 let p = pattern;
471 let mut pi = 0usize;
472 let mut ti = 0usize;
473
474 while pi < p.len() {
475 let mut p_ch = p[pi];
476 let t_ch_raw = if ti < text.len() { text[ti] } else { 0 };
477 let mut t_ch = t_ch_raw;
478
479 if t_ch == 0 && p_ch != b'*' {
480 return WM_ABORT_ALL;
481 }
482 if (flags & WM_CASEFOLD) != 0 && wm_isupper(t_ch) {
483 t_ch = wm_tolower(t_ch);
484 }
485 if (flags & WM_CASEFOLD) != 0 && wm_isupper(p_ch) {
486 p_ch = wm_tolower(p_ch);
487 }
488
489 match p_ch {
490 b'?' => {
491 if (flags & WM_PATHNAME) != 0 && t_ch == b'/' {
492 return WM_NOMATCH;
493 }
494 pi += 1;
496 ti += 1;
497 continue;
498 }
499 b'*' => {
500 pi += 1;
501 let match_slash: bool;
502 if pi < p.len() && p[pi] == b'*' {
503 let prev_p = pi; while pi < p.len() && p[pi] == b'*' {
505 pi += 1;
506 }
507 if (flags & WM_PATHNAME) == 0 {
508 match_slash = true;
509 } else if (prev_p < 2 || p[prev_p - 2] == b'/')
510 && (pi == p.len()
511 || p[pi] == b'/'
512 || (p[pi] == b'\\' && pi + 1 < p.len() && p[pi + 1] == b'/'))
513 {
514 if pi < p.len()
515 && p[pi] == b'/'
516 && dowild(&p[pi + 1..], &text[ti..], flags) == WM_MATCH
517 {
518 return WM_MATCH;
519 }
520 match_slash = true;
521 } else {
522 match_slash = false;
523 }
524 } else {
525 match_slash = (flags & WM_PATHNAME) == 0;
526 }
527
528 if pi == p.len() {
529 if !match_slash && text[ti..].contains(&b'/') {
532 return WM_ABORT_TO_STARSTAR;
533 }
534 return WM_MATCH;
535 } else if !match_slash && p[pi] == b'/' {
536 match text[ti..].iter().position(|&c| c == b'/') {
539 None => return WM_ABORT_ALL,
540 Some(off) => {
541 ti += off; }
543 }
544 pi += 1;
546 ti += 1;
547 continue;
548 }
549
550 let mut cur_t = ti;
552 loop {
553 let mut tc = if cur_t < text.len() { text[cur_t] } else { 0 };
554 if tc == 0 {
555 break;
556 }
557 if !wm_is_glob_special(p[pi]) {
558 let mut pc = p[pi];
559 if (flags & WM_CASEFOLD) != 0 && wm_isupper(pc) {
560 pc = wm_tolower(pc);
561 }
562 loop {
563 tc = if cur_t < text.len() { text[cur_t] } else { 0 };
564 if tc == 0 {
565 break;
566 }
567 if !(match_slash || tc != b'/') {
568 break;
569 }
570 let mut tcf = tc;
571 if (flags & WM_CASEFOLD) != 0 && wm_isupper(tcf) {
572 tcf = wm_tolower(tcf);
573 }
574 if tcf == pc {
575 break;
576 }
577 cur_t += 1;
578 }
579 let tc_cmp = {
581 let raw = if cur_t < text.len() { text[cur_t] } else { 0 };
582 if (flags & WM_CASEFOLD) != 0 && wm_isupper(raw) {
583 wm_tolower(raw)
584 } else {
585 raw
586 }
587 };
588 if tc_cmp != pc {
589 if match_slash {
590 return WM_ABORT_ALL;
591 } else {
592 return WM_ABORT_TO_STARSTAR;
593 }
594 }
595 }
596 let matched = dowild(&p[pi..], &text[cur_t..], flags);
597 if matched != WM_NOMATCH {
598 if !match_slash || matched != WM_ABORT_TO_STARSTAR {
599 return matched;
600 }
601 } else {
602 let cur_raw = if cur_t < text.len() { text[cur_t] } else { 0 };
603 if !match_slash && cur_raw == b'/' {
604 return WM_ABORT_TO_STARSTAR;
605 }
606 }
607 cur_t += 1;
608 }
609 return WM_ABORT_ALL;
610 }
611 b'[' => {
612 pi += 1;
613 let mut p_ch2 = if pi < p.len() { p[pi] } else { 0 };
614 if p_ch2 == b'^' {
615 p_ch2 = b'!';
616 }
617 let negated = p_ch2 == b'!';
618 if negated {
619 pi += 1;
620 p_ch2 = if pi < p.len() { p[pi] } else { 0 };
621 }
622 let mut prev_ch: u8 = 0;
623 let mut matched = false;
624 loop {
625 if p_ch2 == 0 {
626 return WM_ABORT_ALL;
627 }
628 let mut next_prev: u8 = p_ch2;
629 let mut skip_class = false;
630 if p_ch2 == b'\\' {
631 pi += 1;
632 p_ch2 = if pi < p.len() { p[pi] } else { 0 };
633 if p_ch2 == 0 {
634 return WM_ABORT_ALL;
635 }
636 if t_ch == p_ch2 {
637 matched = true;
638 }
639 next_prev = p_ch2;
640 } else if p_ch2 == b'-' && prev_ch != 0 && pi + 1 < p.len() && p[pi + 1] != b']'
641 {
642 pi += 1;
643 p_ch2 = p[pi];
644 if p_ch2 == b'\\' {
645 pi += 1;
646 p_ch2 = if pi < p.len() { p[pi] } else { 0 };
647 if p_ch2 == 0 {
648 return WM_ABORT_ALL;
649 }
650 }
651 if t_ch <= p_ch2 && t_ch >= prev_ch {
652 matched = true;
653 } else if (flags & WM_CASEFOLD) != 0 && wm_islower(t_ch) {
654 let t_up = wm_toupper(t_ch);
655 if t_up <= p_ch2 && t_up >= prev_ch {
656 matched = true;
657 }
658 }
659 next_prev = 0;
660 } else if p_ch2 == b'[' && pi + 1 < p.len() && p[pi + 1] == b':' {
661 let s = pi + 2;
663 let mut scan = s;
664 loop {
665 if scan >= p.len() {
666 break;
667 }
668 if p[scan] == b']' {
669 break;
670 }
671 scan += 1;
672 }
673 pi = scan;
674 p_ch2 = if pi < p.len() { p[pi] } else { 0 };
675 if p_ch2 == 0 {
676 return WM_ABORT_ALL;
677 }
678 let class_end = pi; if class_end < s + 1 || p[class_end - 1] != b':' {
681 pi = s.wrapping_sub(2);
683 p_ch2 = b'[';
684 if t_ch == p_ch2 {
685 matched = true;
686 }
687 skip_class = true;
688 next_prev = p_ch2;
689 } else {
690 let class = &p[s..class_end - 1];
691 match wm_class_matches(class, t_ch, flags) {
692 Some(true) => matched = true,
693 Some(false) => {}
694 None => return WM_ABORT_ALL,
695 }
696 next_prev = 0;
697 }
698 } else if t_ch == p_ch2 {
699 matched = true;
700 }
701
702 let _ = skip_class;
703 prev_ch = next_prev;
705 pi += 1;
706 p_ch2 = if pi < p.len() { p[pi] } else { 0 };
707 if p_ch2 == b']' {
708 break;
709 }
710 }
711 if matched == negated || ((flags & WM_PATHNAME) != 0 && t_ch == b'/') {
712 return WM_NOMATCH;
713 }
714 pi += 1;
715 ti += 1;
716 continue;
717 }
718 b'\\' => {
719 pi += 1;
722 let lit = if pi < p.len() { p[pi] } else { 0 };
723 let lit = if (flags & WM_CASEFOLD) != 0 && wm_isupper(lit) {
724 wm_tolower(lit)
725 } else {
726 lit
727 };
728 if t_ch != lit {
729 return WM_NOMATCH;
730 }
731 pi += 1;
732 ti += 1;
733 continue;
734 }
735 _ => {
736 if t_ch != p_ch {
737 return WM_NOMATCH;
738 }
739 pi += 1;
740 ti += 1;
741 continue;
742 }
743 }
744 }
745
746 if ti < text.len() && text[ti] != 0 {
747 WM_NOMATCH
748 } else {
749 WM_MATCH
750 }
751}
752
753pub fn wildmatch(pattern: &[u8], text: &[u8], flags: u32) -> bool {
756 dowild(pattern, text, flags) == WM_MATCH
757}
758
759#[cfg(test)]
760mod tests {
761 use super::*;
762
763 fn ps(args: &[&str]) -> Pathspec {
764 Pathspec::parse(
765 args.iter().map(|s| s.as_bytes()),
766 PathspecMatchMagic::default(),
767 )
768 .expect("valid pathspec")
769 }
770
771 #[test]
772 fn empty_pathspec_matches_everything() {
773 let p = Pathspec::default();
774 assert!(p.is_empty());
775 assert!(p.matches(b"any/path"));
776 }
777
778 #[test]
779 fn literal_prefix_matches_directory_recursively() {
780 let p = ps(&["src"]);
781 assert!(p.matches(b"src"));
782 assert!(p.matches(b"src/lib.rs"));
783 assert!(!p.matches(b"srcs/lib.rs"));
784 assert!(!p.matches(b"other"));
785 }
786
787 #[test]
788 fn exclude_subtracts_from_includes() {
789 let p = ps(&["src", ":(exclude)src/gen"]);
790 assert!(p.matches(b"src/lib.rs"));
791 assert!(!p.matches(b"src/gen/x.rs"));
792 }
793
794 #[test]
795 fn exclude_shorthand_sigils() {
796 for spec in [":!foo", ":^foo"] {
797 let p = ps(&[spec]);
798 assert!(p.elements()[0].is_exclude());
799 assert!(p.matches(b"bar"));
801 assert!(!p.matches(b"foo"));
802 }
803 }
804
805 #[test]
806 fn icase_magic_folds_case() {
807 let p = ps(&[":(icase)readme"]);
808 assert!(p.matches(b"README"));
809 assert!(p.matches(b"readme"));
810 let plain = ps(&["readme"]);
811 assert!(!plain.matches(b"README"));
812 }
813
814 #[test]
815 fn glob_magic_is_pathname_aware() {
816 let p = ps(&[":(glob)*.rs"]);
818 assert!(p.matches(b"lib.rs"));
819 assert!(!p.matches(b"src/lib.rs"));
820 let pp = ps(&[":(glob)**/*.rs"]);
822 assert!(pp.matches(b"src/lib.rs"));
823 }
824
825 #[test]
826 fn literal_magic_disables_wildcards() {
827 let p = ps(&[":(literal)a*b"]);
828 assert!(p.matches(b"a*b"));
829 assert!(!p.matches(b"axxb"));
830 }
831
832 #[test]
833 fn top_magic_is_parsed() {
834 let p = ps(&[":(top)src", ":/other"]);
835 assert!(p.elements()[0].is_top());
836 assert!(p.elements()[1].is_top());
837 }
838
839 #[test]
840 fn attr_magic_is_retained() {
841 let p = ps(&[":(attr:binary)data"]);
842 assert_eq!(p.elements()[0].attrs(), &[b"binary".to_vec()]);
843 assert_eq!(p.elements()[0].pattern(), b"data");
844 }
845
846 #[test]
847 fn combined_magic_words() {
848 let p = ps(&[":(exclude,icase)Cargo.lock"]);
849 let el = &p.elements()[0];
850 assert!(el.is_exclude());
851 assert!(!p.matches(b"CARGO.LOCK"));
853 }
854
855 fn parse_err(arg: &[u8]) -> PathspecParseError {
856 match Pathspec::parse([arg], PathspecMatchMagic::default()) {
857 Ok(_) => panic!(
858 "expected parse error for {:?}",
859 String::from_utf8_lossy(arg)
860 ),
861 Err(e) => e,
862 }
863 }
864
865 #[test]
866 fn glob_literal_conflict_is_error() {
867 assert_eq!(
868 parse_err(b":(glob,literal)x"),
869 PathspecParseError::GlobLiteralConflict
870 );
871 }
872
873 #[test]
874 fn unknown_magic_is_error() {
875 assert!(matches!(
876 parse_err(b":(bogus)x"),
877 PathspecParseError::UnknownMagic(_)
878 ));
879 }
880
881 #[test]
882 fn unterminated_magic_is_error() {
883 assert_eq!(
884 parse_err(b":(exclude"),
885 PathspecParseError::UnterminatedMagic
886 );
887 }
888
889 #[test]
890 fn exclude_only_keeps_unmatched() {
891 let p = ps(&[":(exclude)target"]);
892 assert!(p.matches(b"src/lib.rs"));
893 assert!(!p.matches(b"target/debug"));
894 }
895}