globcmp_lib/
lib.rs

1use multipeek::{multipeek, MultiPeek};
2use std::{fmt, ops::Range, str::FromStr};
3
4#[cfg(feature = "stack-safe")]
5const STACK_RED_ZONE: usize = 32 * 1024;
6#[cfg(feature = "stack-safe")]
7const STACK_GROW_SIZE: usize = 1024 * 1024;
8
9#[derive(Debug, PartialEq, Eq, Clone)]
10enum Token {
11    Slash,
12    Char(char),
13    CharClass(Vec<char>),
14    AnyChar,
15    AnyChars,
16    AnyRecur,
17}
18
19#[derive(Debug, PartialEq, Eq)]
20pub enum PatternError {
21    UnmatchedOpenBracket,
22    UnmatchedCloseBracket,
23    EmptyCharClass,
24    InvalidCharInClass,
25    InvalidRecursive,
26}
27
28impl fmt::Display for PatternError {
29    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
30        use PatternError::*;
31
32        match self {
33            UnmatchedOpenBracket => "unmatched opening bracket",
34            UnmatchedCloseBracket => "unmatched closing bracket",
35            EmptyCharClass => "empty character class",
36            InvalidCharInClass => {
37                "invalid character in character class"
38            }
39            InvalidRecursive => "invalid recursive pattern",
40        }
41        .fmt(f)
42    }
43}
44
45impl std::error::Error for PatternError {}
46
47#[derive(Debug)]
48struct PatternIterator<T>
49where
50    T: Iterator<Item = Token> + Clone,
51{
52    tokens: MultiPeek<T>,
53}
54
55impl<T> PatternIterator<T>
56where
57    T: Iterator<Item = Token> + Clone,
58{
59    /// This method is private in order to preserve the invariant that
60    /// `TokenizedPattern` represents a valid pattern.
61    ///
62    /// Use [`TokenizedPattern::from_str`] instead to create a
63    /// `TokenizedPattern` from a string.
64    fn from(tokens: MultiPeek<T>) -> Self {
65        Self { tokens }
66    }
67
68    fn is_more_specific_than(self, other: Self) -> bool {
69        let mut a_iter = self.tokens;
70        let mut b_iter = other.tokens;
71
72        fn recur_moving_a<T>(
73            a_iter: &MultiPeek<T>,
74            b_iter: &MultiPeek<T>,
75        ) -> bool
76        where
77            T: Iterator<Item = Token> + Clone,
78        {
79            let mut a_clone = a_iter.clone();
80            let b_clone = b_iter.clone();
81
82            a_clone.next();
83
84            PatternIterator::from(a_clone)
85                .is_more_specific_than(PatternIterator::from(b_clone))
86        }
87
88        fn recur_moving_b<T>(
89            a_iter: &MultiPeek<T>,
90            b_iter: &MultiPeek<T>,
91        ) -> bool
92        where
93            T: Iterator<Item = Token> + Clone,
94        {
95            let a_clone = a_iter.clone();
96            let mut b_clone = b_iter.clone();
97
98            b_clone.next();
99
100            PatternIterator::from(a_clone)
101                .is_more_specific_than(PatternIterator::from(b_clone))
102        }
103
104        fn recur_moving_both<T>(
105            a_iter: &MultiPeek<T>,
106            b_iter: &MultiPeek<T>,
107        ) -> bool
108        where
109            T: Iterator<Item = Token> + Clone,
110        {
111            let mut a_clone = a_iter.clone();
112            let mut b_clone = b_iter.clone();
113
114            a_clone.next();
115            b_clone.next();
116
117            PatternIterator::from(a_clone)
118                .is_more_specific_than(PatternIterator::from(b_clone))
119        }
120
121        loop {
122            use Token::*;
123
124            let a_token = a_iter.peek();
125            let b_token = b_iter.peek();
126
127            match (a_token, b_token) {
128                (None, None) => return true,
129                (None, Some(AnyChars)) => {
130                    b_iter.next();
131                }
132                (None, Some(_)) | (Some(_), None) => return false,
133                (Some(AnyChars), Some(AnyChars))
134                | (Some(AnyRecur), Some(AnyRecur)) => {
135                    #[cfg(feature = "stack-safe")]
136                    return stacker::maybe_grow(
137                        STACK_RED_ZONE,
138                        STACK_GROW_SIZE,
139                        || {
140                            recur_moving_a(&a_iter, &b_iter)
141                                || recur_moving_b(&a_iter, &b_iter)
142                                || recur_moving_both(&a_iter, &b_iter)
143                        },
144                    );
145                    #[cfg(not(feature = "stack-safe"))]
146                    return recur_moving_a(&a_iter, &b_iter)
147                        || recur_moving_b(&a_iter, &b_iter)
148                        || recur_moving_both(&a_iter, &b_iter);
149                }
150                (Some(Slash), Some(Slash)) => {
151                    let a_next = a_iter.peek_nth(1);
152                    let b_next = b_iter.peek_nth(1);
153
154                    match (a_next, b_next) {
155                        (Some(AnyRecur), Some(AnyRecur)) => {
156                            #[cfg(feature = "stack-safe")]
157                            return stacker::maybe_grow(
158                                STACK_RED_ZONE,
159                                STACK_GROW_SIZE,
160                                || {
161                                    recur_moving_a(&a_iter, &b_iter)
162                                        || recur_moving_b(
163                                            &a_iter, &b_iter,
164                                        )
165                                        || recur_moving_both(
166                                            &a_iter, &b_iter,
167                                        )
168                                },
169                            );
170                            #[cfg(not(feature = "stack-safe"))]
171                            return recur_moving_a(&a_iter, &b_iter)
172                                || recur_moving_b(&a_iter, &b_iter)
173                                || recur_moving_both(&a_iter, &b_iter);
174                        }
175                        (Some(AnyRecur), _) => {
176                            a_iter.next();
177                        }
178                        (_, Some(AnyRecur)) => {
179                            b_iter.next();
180                        }
181                        _ => {
182                            a_iter.next();
183                            b_iter.next();
184                        }
185                    }
186                }
187                (Some(a), Some(b)) if a == b => {
188                    a_iter.next();
189                    b_iter.next();
190                }
191                (Some(AnyChar), Some(CharClass(_)))
192                | (Some(AnyChar), Some(Char(_)))
193                | (Some(CharClass(_)), Some(Char(_))) => return false,
194                (Some(CharClass(_)), Some(AnyChar))
195                | (Some(Char(_)), Some(AnyChar)) => {
196                    a_iter.next();
197                    b_iter.next();
198                }
199                (Some(Char(a)), Some(CharClass(bs))) => {
200                    if !bs.contains(a) {
201                        return false;
202                    }
203
204                    a_iter.next();
205                    b_iter.next();
206                }
207                (Some(AnyChars), Some(_))
208                | (Some(AnyRecur), Some(_)) => return false,
209                (Some(Slash), Some(AnyChars)) => {
210                    b_iter.next();
211                }
212                (Some(_), Some(AnyChars)) => {
213                    #[cfg(feature = "stack-safe")]
214                    return stacker::maybe_grow(
215                        STACK_RED_ZONE,
216                        STACK_GROW_SIZE,
217                        || {
218                            recur_moving_a(&a_iter, &b_iter)
219                                || recur_moving_b(&a_iter, &b_iter)
220                                || recur_moving_both(&a_iter, &b_iter)
221                        },
222                    );
223                    #[cfg(not(feature = "stack-safe"))]
224                    return recur_moving_a(&a_iter, &b_iter)
225                        || recur_moving_b(&a_iter, &b_iter)
226                        || recur_moving_both(&a_iter, &b_iter);
227                }
228                (Some(Slash), Some(AnyRecur)) => {
229                    #[cfg(feature = "stack-safe")]
230                    return stacker::maybe_grow(
231                        STACK_RED_ZONE,
232                        STACK_GROW_SIZE,
233                        || {
234                            recur_moving_a(&a_iter, &b_iter)
235                                || recur_moving_b(&a_iter, &b_iter)
236                        },
237                    );
238                    #[cfg(not(feature = "stack-safe"))]
239                    return recur_moving_a(&a_iter, &b_iter)
240                        || recur_moving_b(&a_iter, &b_iter);
241                }
242                (Some(_), Some(AnyRecur)) => {
243                    a_iter.next();
244                }
245                (Some(_), Some(_)) => return false,
246            }
247        }
248    }
249
250    pub fn count_matching_chars<U>(
251        self,
252        path: MultiPeek<U>,
253    ) -> Option<usize>
254    where
255        U: Iterator<Item = char> + Clone,
256    {
257        let mut tokens = self.tokens;
258        let mut path_chars = path;
259
260        fn recur_moving_tokens<T, U>(
261            tokens: &MultiPeek<T>,
262            path_chars: &MultiPeek<U>,
263        ) -> Option<usize>
264        where
265            T: Iterator<Item = Token> + Clone,
266            U: Iterator<Item = char> + Clone,
267        {
268            let mut tokens_clone = tokens.clone();
269            let path_chars_clone = path_chars.clone();
270
271            tokens_clone.next();
272
273            PatternIterator::from(tokens_clone)
274                .count_matching_chars(path_chars_clone)
275        }
276
277        fn recur_moving_path<T, U>(
278            tokens: &MultiPeek<T>,
279            path_chars: &MultiPeek<U>,
280        ) -> Option<usize>
281        where
282            T: Iterator<Item = Token> + Clone,
283            U: Iterator<Item = char> + Clone,
284        {
285            let tokens_clone = tokens.clone();
286            let mut path_chars_clone = path_chars.clone();
287
288            path_chars_clone.next();
289
290            PatternIterator::from(tokens_clone)
291                .count_matching_chars(path_chars_clone)
292        }
293
294        fn recur_moving_both<T, U>(
295            tokens: &MultiPeek<T>,
296            path_chars: &MultiPeek<U>,
297        ) -> Option<usize>
298        where
299            T: Iterator<Item = Token> + Clone,
300            U: Iterator<Item = char> + Clone,
301        {
302            let mut tokens_clone = tokens.clone();
303            let mut path_chars_clone = path_chars.clone();
304
305            tokens_clone.next();
306            path_chars_clone.next();
307
308            PatternIterator::from(tokens_clone)
309                .count_matching_chars(path_chars_clone)
310        }
311
312        let mut count = 0;
313
314        loop {
315            use Token::*;
316
317            let token = tokens.peek();
318            let path_char = path_chars.peek();
319
320            match (token, path_char) {
321                (None, None) => return Some(count),
322                (None, Some(_)) => return None,
323                (Some(Slash), Some('/')) => {
324                    let token_next = tokens.peek_nth(1);
325
326                    if let Some(AnyRecur) = token_next {
327                        tokens.next();
328                    } else {
329                        tokens.next();
330                        path_chars.next();
331                        count += 1;
332                    }
333                }
334                (Some(AnyChars), Some('/'))
335                | (Some(AnyChars), None) => {
336                    tokens.next();
337                }
338                (Some(AnyRecur), Some('/')) => {
339                    #[cfg(feature = "stack-safe")]
340                    let remainder = stacker::maybe_grow(
341                        STACK_RED_ZONE,
342                        STACK_GROW_SIZE,
343                        || {
344                            [
345                                recur_moving_tokens(
346                                    &tokens,
347                                    &path_chars,
348                                ),
349                                recur_moving_path(&tokens, &path_chars),
350                            ]
351                            .into_iter()
352                            .max()
353                        },
354                    )??;
355                    #[cfg(not(feature = "stack-safe"))]
356                    let remainder = [
357                        recur_moving_tokens(&tokens, &path_chars),
358                        recur_moving_path(&tokens, &path_chars),
359                    ]
360                    .into_iter()
361                    .max()??;
362
363                    return Some(count + remainder);
364                }
365                (Some(AnyRecur), Some(_)) => {
366                    path_chars.next();
367                }
368                (Some(AnyChars), Some(_)) => {
369                    #[cfg(feature = "stack-safe")]
370                    let remainder = stacker::maybe_grow(
371                        STACK_RED_ZONE,
372                        STACK_GROW_SIZE,
373                        || {
374                            [
375                                recur_moving_tokens(
376                                    &tokens,
377                                    &path_chars,
378                                ),
379                                recur_moving_path(&tokens, &path_chars),
380                                recur_moving_both(&tokens, &path_chars),
381                            ]
382                            .into_iter()
383                            .max()
384                        },
385                    )??;
386                    #[cfg(not(feature = "stack-safe"))]
387                    let remainder = [
388                        recur_moving_tokens(&tokens, &path_chars),
389                        recur_moving_path(&tokens, &path_chars),
390                        recur_moving_both(&tokens, &path_chars),
391                    ]
392                    .into_iter()
393                    .max()??;
394
395                    return Some(count + remainder);
396                }
397                (Some(_), Some('/')) | (Some(_), None) => return None,
398                (Some(AnyChar), Some(_)) => {
399                    tokens.next();
400                    path_chars.next();
401                    count += 1;
402                }
403                (Some(CharClass(chars)), Some(b)) => {
404                    if !chars.contains(b) {
405                        return None;
406                    }
407                    tokens.next();
408                    path_chars.next();
409                    count += 1;
410                }
411                (Some(Char(a)), Some(b)) if a == b => {
412                    tokens.next();
413                    path_chars.next();
414                    count += 1;
415                }
416                (Some(_), Some(_)) => return None,
417            }
418        }
419    }
420}
421
422#[derive(Debug, PartialEq, Eq, Clone)]
423pub struct Pattern {
424    original: String,
425    tokens: Vec<Token>,
426}
427
428impl Pattern {
429    /// `self` is weakly more specific than `other`.
430    ///
431    /// If `self` and `other` are not comparable, then this method
432    /// returns false both ways.  If `self` and `other` are the same,
433    /// then it returns true both ways.
434    ///
435    /// # Examples
436    ///
437    /// TODO
438    pub fn is_more_specific_than(&self, other: &Self) -> bool {
439        let self_tokens = self.tokens.to_vec();
440        let other_tokens = other.tokens.to_vec();
441
442        PatternIterator::from(multipeek(self_tokens))
443            .is_more_specific_than(PatternIterator::from(multipeek(
444                other_tokens,
445            )))
446    }
447
448    /// Counts the number of characters in `path` that are either
449    ///
450    /// 1. matched exactly (e.g., `a`),
451    /// 2. matched by a character class (e.g., `[a-f]`), or
452    /// 3. matched by a single-character wildcard (`?`)
453    ///
454    /// This means that it does not count characters that are matched by
455    /// `*` or `**`.
456    ///
457    /// Returns `None` if the pattern does not match `path`.
458    ///
459    /// # Examples
460    ///
461    /// ```
462    /// use std::str::FromStr;
463    /// use globcmp_lib::Pattern;
464    ///
465    /// let pattern = Pattern::from_str("foo/bar").unwrap();
466    /// let count = pattern.count_matching_chars("foo/bar");
467    /// assert_eq!(count, Some(7));
468    /// ```
469    ///
470    /// ```
471    /// use std::str::FromStr;
472    /// use globcmp_lib::Pattern;
473    ///
474    /// let pattern = Pattern::from_str("foo/*r").unwrap();
475    /// let count = pattern.count_matching_chars("foo/bar");
476    /// assert_eq!(count, Some(5));
477    /// ```
478    ///
479    /// ```
480    /// use std::str::FromStr;
481    /// use globcmp_lib::Pattern;
482    ///
483    /// let pattern = Pattern::from_str("foo/*z").unwrap();
484    /// let count = pattern.count_matching_chars("foo/bar");
485    /// assert!(count.is_none());
486    /// ```
487    pub fn count_matching_chars(&self, path: &str) -> Option<usize> {
488        let self_tokens = self.tokens.to_vec();
489
490        PatternIterator::from(multipeek(self_tokens))
491            .count_matching_chars(multipeek(path.chars()))
492    }
493
494    /// Returns the string representation with which
495    /// [`Pattern::from_str`] was called.
496    pub fn as_str(&self) -> &str {
497        self.original.as_str()
498    }
499}
500
501impl FromStr for Pattern {
502    type Err = PatternError;
503
504    /// Tries to convert a pattern into a valid `Pattern`.
505    ///
506    /// # Invariants
507    ///
508    /// TODO
509    ///
510    /// # Examples
511    ///
512    /// TODO with assert!(...is_ok()) and assert!(...is_err()).
513    fn from_str(value: &str) -> Result<Self, Self::Err> {
514        let mut tokens = Vec::new();
515        let mut class_buf = Vec::<char>::new();
516        let mut in_char_class = false;
517        let mut pending_star = false;
518
519        for char in value.chars() {
520            if char == '*' {
521                if !pending_star {
522                    pending_star = true;
523                } else {
524                    tokens.push(Token::AnyRecur);
525                    pending_star = false;
526                }
527                continue;
528            } else if pending_star {
529                tokens.push(Token::AnyChars);
530                pending_star = false;
531            }
532
533            let token = match char {
534                '/' => Token::Slash,
535                '?' => Token::AnyChar,
536                '[' => {
537                    if in_char_class {
538                        return Err(Self::Err::InvalidCharInClass);
539                    } else {
540                        in_char_class = true;
541                        continue;
542                    }
543                }
544                ']' => {
545                    if !in_char_class {
546                        return Err(Self::Err::UnmatchedCloseBracket);
547                    } else if class_buf.is_empty() {
548                        return Err(Self::Err::EmptyCharClass);
549                    } else {
550                        in_char_class = false;
551
552                        let mut buf_iter = multipeek(&class_buf);
553                        let mut char_class =
554                            Vec::with_capacity(class_buf.len());
555                        let mut prev = None;
556
557                        while let Some(&c) = buf_iter.next() {
558                            if c == '-'
559                                && let Some(c_prev) = prev
560                                && let Some(&&c_next) = buf_iter.peek()
561                            {
562                                let range = Range {
563                                    start: c_prev,
564                                    end: c_next,
565                                };
566
567                                if range.is_empty() {
568                                    // Remove `c_prev` which we have
569                                    // pushed to `char_class` in the
570                                    // previous iteration.
571                                    char_class.pop();
572
573                                    // Skip `c_next` in the next
574                                    // iteration.
575                                    buf_iter.next();
576                                } else {
577                                    // Skip because `c_prev` was already
578                                    // pushed to `char_class` in the
579                                    // previous iteration.
580                                    char_class.extend(range.skip(1));
581                                }
582                            } else {
583                                char_class.push(c);
584                            }
585
586                            prev = Some(c);
587                        }
588
589                        Token::CharClass(std::mem::take(
590                            &mut char_class,
591                        ))
592                    }
593                }
594                _ => {
595                    if in_char_class {
596                        class_buf.push(char);
597                        continue;
598                    } else {
599                        Token::Char(char)
600                    }
601                }
602            };
603
604            tokens.push(token);
605        }
606
607        if in_char_class {
608            return Err(Self::Err::UnmatchedOpenBracket);
609        }
610
611        if pending_star {
612            tokens.push(Token::AnyChars);
613        }
614
615        // Validate the usage of `**`.  It should never be preceded or
616        // succeeded by any token other than `/`.
617        //
618
619        let mut prev_token: Option<&Token> = None;
620
621        for token in tokens.iter() {
622            if *token == Token::AnyRecur
623                && let Some(p) = prev_token
624                && *p != Token::Slash
625            {
626                return Err(Self::Err::InvalidRecursive);
627            } else if prev_token == Some(&Token::AnyRecur)
628                && *token != Token::Slash
629            {
630                return Err(Self::Err::InvalidRecursive);
631            }
632
633            prev_token = Some(token);
634        }
635
636        Ok(Self {
637            original: value.to_string(),
638            tokens,
639        })
640    }
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646
647    #[test]
648    fn tokenize_empty_str() {
649        assert_eq!(
650            Pattern::from_str(""),
651            Ok(Pattern {
652                original: String::new(),
653                tokens: Vec::new()
654            })
655        );
656    }
657
658    #[test]
659    fn tokenize_exact() {
660        assert_eq!(
661            Pattern::from_str("foo/bar"),
662            Ok(Pattern {
663                original: "foo/bar".to_string(),
664                tokens: vec![
665                    Token::Char('f'),
666                    Token::Char('o'),
667                    Token::Char('o'),
668                    Token::Slash,
669                    Token::Char('b'),
670                    Token::Char('a'),
671                    Token::Char('r'),
672                ]
673            })
674        );
675    }
676
677    #[test]
678    fn tokenize_any_char() {
679        assert_eq!(
680            Pattern::from_str("foo/b?r"),
681            Ok(Pattern {
682                original: "foo/b?r".to_string(),
683                tokens: vec![
684                    Token::Char('f'),
685                    Token::Char('o'),
686                    Token::Char('o'),
687                    Token::Slash,
688                    Token::Char('b'),
689                    Token::AnyChar,
690                    Token::Char('r'),
691                ]
692            })
693        );
694    }
695
696    #[test]
697    fn tokenize_any_chars() {
698        assert_eq!(
699            Pattern::from_str("foo/b*r"),
700            Ok(Pattern {
701                original: "foo/b*r".to_string(),
702                tokens: vec![
703                    Token::Char('f'),
704                    Token::Char('o'),
705                    Token::Char('o'),
706                    Token::Slash,
707                    Token::Char('b'),
708                    Token::AnyChars,
709                    Token::Char('r'),
710                ]
711            })
712        );
713
714        assert_eq!(
715            Pattern::from_str("foo/*b*r*"),
716            Ok(Pattern {
717                original: "foo/*b*r*".to_string(),
718                tokens: vec![
719                    Token::Char('f'),
720                    Token::Char('o'),
721                    Token::Char('o'),
722                    Token::Slash,
723                    Token::AnyChars,
724                    Token::Char('b'),
725                    Token::AnyChars,
726                    Token::Char('r'),
727                    Token::AnyChars,
728                ]
729            })
730        );
731    }
732
733    #[test]
734    fn tokenize_valid_char_class() {
735        assert_eq!(
736            Pattern::from_str("foo/ba[rz]"),
737            Ok(Pattern {
738                original: "foo/ba[rz]".to_string(),
739                tokens: vec![
740                    Token::Char('f'),
741                    Token::Char('o'),
742                    Token::Char('o'),
743                    Token::Slash,
744                    Token::Char('b'),
745                    Token::Char('a'),
746                    Token::CharClass(vec!['r', 'z']),
747                ]
748            })
749        );
750
751        let mut chars = vec!['a'];
752        chars.extend('p'..='z');
753
754        assert_eq!(
755            Pattern::from_str("foo/ba[ap-z]"),
756            Ok(Pattern {
757                original: "foo/ba[ap-z]".to_string(),
758                tokens: vec![
759                    Token::Char('f'),
760                    Token::Char('o'),
761                    Token::Char('o'),
762                    Token::Slash,
763                    Token::Char('b'),
764                    Token::Char('a'),
765                    Token::CharClass(chars),
766                ]
767            })
768        );
769
770        // `[az-p]` only matches `a` because `z-p` is the empty
771        // interval.
772        assert_eq!(
773            Pattern::from_str("foo/ba[az-p]"),
774            Ok(Pattern {
775                original: "foo/ba[az-p]".to_string(),
776                tokens: vec![
777                    Token::Char('f'),
778                    Token::Char('o'),
779                    Token::Char('o'),
780                    Token::Slash,
781                    Token::Char('b'),
782                    Token::Char('a'),
783                    Token::CharClass(vec!['a']),
784                ]
785            })
786        );
787
788        assert_eq!(
789            Pattern::from_str("foo/ba[a-]"),
790            Ok(Pattern {
791                original: "foo/ba[a-]".to_string(),
792                tokens: vec![
793                    Token::Char('f'),
794                    Token::Char('o'),
795                    Token::Char('o'),
796                    Token::Slash,
797                    Token::Char('b'),
798                    Token::Char('a'),
799                    Token::CharClass(vec!['a', '-']),
800                ]
801            })
802        );
803
804        assert_eq!(
805            Pattern::from_str("foo/ba[-a]"),
806            Ok(Pattern {
807                original: "foo/ba[-a]".to_string(),
808                tokens: vec![
809                    Token::Char('f'),
810                    Token::Char('o'),
811                    Token::Char('o'),
812                    Token::Slash,
813                    Token::Char('b'),
814                    Token::Char('a'),
815                    Token::CharClass(vec!['-', 'a']),
816                ]
817            })
818        );
819    }
820
821    #[test]
822    fn tokenize_empty_char_class() {
823        assert_eq!(
824            Pattern::from_str("foo/ba[]"),
825            Err(PatternError::EmptyCharClass)
826        );
827    }
828
829    #[test]
830    fn tokenize_unclosed_char_class() {
831        assert_eq!(
832            Pattern::from_str("foo/ba[rz"),
833            Err(PatternError::UnmatchedOpenBracket)
834        );
835    }
836
837    #[test]
838    fn tokenize_unopened_char_class() {
839        assert_eq!(
840            Pattern::from_str("foo/barz]"),
841            Err(PatternError::UnmatchedCloseBracket)
842        );
843    }
844
845    #[test]
846    fn tokenize_invalid_char_class() {
847        assert_eq!(
848            Pattern::from_str("foo/ba[[rz]"),
849            Err(PatternError::InvalidCharInClass)
850        );
851    }
852
853    #[test]
854    fn tokenize_valid_recursive() {
855        assert_eq!(
856            Pattern::from_str("foo/**/baz"),
857            Ok(Pattern {
858                original: "foo/**/baz".to_string(),
859                tokens: vec![
860                    Token::Char('f'),
861                    Token::Char('o'),
862                    Token::Char('o'),
863                    Token::Slash,
864                    Token::AnyRecur,
865                    Token::Slash,
866                    Token::Char('b'),
867                    Token::Char('a'),
868                    Token::Char('z'),
869                ]
870            })
871        );
872    }
873
874    #[test]
875    fn tokenize_invalid_recursive() {
876        for pattern in [
877            "x**/bar/baz",
878            "**x/bar/baz",
879            "***/bar/baz",
880            "foo/**x/baz",
881            "foo/x**/baz",
882            "foo/***/baz",
883            "foo/bar/x**",
884            "foo/bar/**x",
885            "foo/bar/***",
886        ] {
887            assert_eq!(
888                Pattern::from_str(dbg!(pattern)),
889                Err(PatternError::InvalidRecursive)
890            );
891        }
892    }
893
894    macro_rules! same_specificity {
895        ($a:expr, $b:expr) => {
896            let a_str = $a;
897            let a_pattern = Pattern::from_str(a_str)
898                .expect("first pattern should be valid");
899
900            let b_str = $b;
901            let b_pattern = Pattern::from_str(b_str)
902                .expect("second pattern should be valid");
903
904            assert!(
905                a_pattern.is_more_specific_than(&b_pattern),
906                "{a_str:?} should be more specific than {b_str:?}"
907            );
908            assert!(
909                b_pattern.is_more_specific_than(&a_pattern),
910                "{b_str:?} should be more specific than {a_str:?}"
911            );
912        };
913    }
914
915    macro_rules! more_specific {
916        ($a:expr, $b:expr) => {
917            let a_str = $a;
918            let a_pattern = Pattern::from_str(a_str)
919                .expect("first pattern should be valid");
920
921            let b_str = $b;
922            let b_pattern = Pattern::from_str(b_str)
923                .expect("second pattern should be valid");
924
925            assert!(
926                a_pattern.is_more_specific_than(&b_pattern),
927                "{a_str:?} should be more specific than {b_str:?}"
928            );
929            assert!(
930                !b_pattern.is_more_specific_than(&a_pattern),
931                "{b_str:?} should not be more specific than {a_str:?}"
932            );
933        };
934    }
935
936    macro_rules! unknown_specificity {
937        ($a:expr, $b:expr) => {
938            let a_str = $a;
939            let a_pattern = Pattern::from_str(a_str)
940                .expect("first pattern should be valid");
941
942            let b_str = $b;
943            let b_pattern = Pattern::from_str(b_str)
944                .expect("second pattern should be valid");
945
946            assert!(
947                !a_pattern.is_more_specific_than(&b_pattern),
948                "{a_str:?} should not be more specific than {b_str:?}"
949            );
950            assert!(
951                !b_pattern.is_more_specific_than(&a_pattern),
952                "{b_str:?} should not be more specific than {a_str:?}"
953            );
954        };
955    }
956
957    #[test]
958    fn pattern_specificity() {
959        same_specificity!("foo", "foo");
960        same_specificity!("foo/bar", "foo/bar");
961
962        // Any character.
963        for pattern in [
964            "?oo/bar", "??o/bar", "???/bar", "f?o/bar", "fo?/bar",
965            "foo/?ar", "foo/??r", "foo/???", "foo/b?r", "foo/ba?",
966        ] {
967            // Versus exact path.
968            more_specific!("foo/bar", dbg!(pattern));
969
970            // Versus same any-character pattern.
971            same_specificity!(pattern, pattern);
972        }
973
974        // Wildcard.
975        for pattern in [
976            // Superfluous wildcard.
977            "*foo/bar", "f*oo/bar", "foo*/bar", "foo/*bar", "foo/b*ar",
978            "foo/bar*",
979            // Wildcard matching a single character.
980            "*oo/bar", "*o*/bar", "f*o/bar", "fo*/bar", "foo/*ar",
981            "foo/*a*", "foo/b*r", "foo/ba*",
982            // Wildcard matching multiple characters.
983            "*/bar", "foo/*",
984        ] {
985            more_specific!("foo/bar", dbg!(pattern));
986            same_specificity!(pattern, pattern);
987        }
988
989        // Recursive wildcard.
990        for pattern in ["**/bar", "foo/**/bar"] {
991            more_specific!("foo/bar", dbg!(pattern));
992            same_specificity!(pattern, pattern);
993        }
994
995        more_specific!("foo/**/baz/lorem", "foo/**/lorem");
996        more_specific!("foo/bar/**/lorem", "foo/**/lorem");
997        more_specific!("foo/bar/**/lorem", "**/lorem");
998        more_specific!("foo/**/bar/baz", "**/bar/baz");
999
1000        // Wildcard at shallower vs. deeper level.
1001        more_specific!("foo/*/*", "*/*/*");
1002        more_specific!("foo/bar/*", "*/*/*");
1003        more_specific!("foo/bar/*", "foo/*/*");
1004        more_specific!("foo/bar/baz", "*/*/*");
1005        more_specific!("foo/bar/baz", "foo/*/*");
1006        more_specific!("foo/bar/baz", "foo/bar/*");
1007        more_specific!("foo/bar/baz", "foo/bar/b*");
1008        more_specific!("foo/bar/baz", "foo/bar/ba*");
1009        more_specific!("foo/bar/baz", "foo/bar/baz*");
1010        more_specific!("foo/bar/baz", "foo/bar/*z");
1011        more_specific!("foo/bar/baz", "foo/bar/*az");
1012        more_specific!("foo/bar/baz", "foo/bar/*baz");
1013        more_specific!("foo/bar/baz", "foo/bar/b*az");
1014        more_specific!("foo/bar/baz", "foo/bar/ba*z");
1015        more_specific!("foo/bar/baz", "foo/bar/b*a*z");
1016        more_specific!("foo/bar/baz", "foo/bar/*b*a*z*");
1017        more_specific!("foo/bar/baz", "foo/b?r/baz");
1018        more_specific!("foo/bar/baz", "foo/b??/???");
1019        more_specific!("foo/bar/???", "foo/b??/???");
1020        more_specific!("foo/b??/baz", "foo/b??/???");
1021        more_specific!("foo/ba?/???", "foo/b??/???");
1022
1023        // NOTE We could argue that "**/bar" matches at a lower level
1024        // than "foo/**/*", and that it is thus more specific.  We don't
1025        // do this though.
1026        unknown_specificity!("**/bar", "foo/*");
1027        unknown_specificity!("**/bar", "foo/**/*");
1028        unknown_specificity!("**/bar/baz", "foo/**/baz");
1029
1030        // NOTE If we could compare the patterns against a concrete
1031        // path, e.g., "lorem/foo/bar/baz", then we could argue that
1032        // "**/foo/**/*" matches at a higher level than "**/bar/**/*",
1033        // and thus it is less specific.  But not having a concrete path
1034        // to compare against, we cannot say anything about specificity.
1035        unknown_specificity!("**/foo/**/*", "**/bar/**/*");
1036
1037        more_specific!("foo/b*r", "foo/*r");
1038        more_specific!("foo/b*r", "foo/*");
1039        more_specific!("foo/b?r", "foo/b*r");
1040
1041        // Character classes.
1042        more_specific!("foo/bar", "foo/ba[rz]");
1043        more_specific!("foo/ba[rz]", "foo/ba?");
1044        more_specific!("foo/ba[rz]", "foo/ba*");
1045        more_specific!("foo/bar", "foo/ba[p-z]");
1046        more_specific!("foo/ba[p-z]", "foo/ba?");
1047        more_specific!("foo/ba[p-z]", "foo/ba*");
1048        more_specific!("foo/ba?", "foo/ba*");
1049
1050        // Wildcard placement in the filename.
1051        more_specific!("foo/bar.*", "foo/*");
1052        more_specific!("foo/*.c", "foo/*");
1053        unknown_specificity!("foo/bar.*", "foo/*.c");
1054        more_specific!("**/bar.*", "**/*");
1055        more_specific!("**/*.c", "**/*");
1056        unknown_specificity!("**/bar.*", "**/*.c");
1057        unknown_specificity!("foo/**/baz.*", "**/bar/*.c");
1058
1059        // Patterns that don't match the same paths.
1060        unknown_specificity!("foo/bar", "lorem/ipsum");
1061        unknown_specificity!("foo/ba[rz]", "foo/bax");
1062        unknown_specificity!("foo/ba?", "foo/baxx");
1063        unknown_specificity!("foo/*", "foo/bar/*");
1064    }
1065
1066    macro_rules! count_of {
1067        ($pattern:expr, $path:expr, $expected:expr) => {
1068            let pattern = Pattern::from_str($pattern)
1069                .expect("pattern should be valid");
1070
1071            assert_eq!(pattern.count_matching_chars($path), $expected);
1072        };
1073    }
1074
1075    #[test]
1076    fn count_of_matching_chars() {
1077        count_of!("", "", Some(0));
1078        count_of!("*", "bar", Some(0));
1079        count_of!("foo", "bar", None);
1080        count_of!("f*", "bar", None);
1081        count_of!("fo*", "bar", None);
1082        count_of!("*o", "bar", None);
1083        count_of!("f*o", "bar", None);
1084        count_of!("*o*", "bar", None);
1085
1086        count_of!("foo", "foo", Some(3));
1087
1088        count_of!("foo*", "foobar", Some(3));
1089        count_of!("foo*r", "foobar", Some(4));
1090
1091        count_of!("foo???", "foobar", Some(6));
1092        count_of!("f?o?a?", "foobar", Some(6));
1093        count_of!("?o?b?r", "foobar", Some(6));
1094        count_of!("foo[a-f][a-f]r", "foobar", Some(6));
1095
1096        count_of!("foo/*r", "foo/bar", Some(5));
1097        count_of!("foo/**/*r", "foo/bar", Some(5));
1098
1099        count_of!("foo/bar", "foo/bar", Some(7));
1100        count_of!("foo/**/foo", "foo/bar", None);
1101        count_of!("foo/**/foo", "foo/bar/baz/foo", Some(7));
1102        count_of!("foo/bar/baz/foo", "foo/bar/baz/foo", Some(15));
1103    }
1104}