globcmp_lib/
lib.rs

1use multipeek::{multipeek, MultiPeek};
2use std::{fmt, ops::Range, str::FromStr};
3
4#[cfg(feature = "stack-safe")]
5const STACK_RED_ZONE: usize = 32 * 1024;
6#[cfg(feature = "stack-safe")]
7const STACK_GROW_SIZE: usize = 1024 * 1024;
8
9#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
10enum Token {
11    Slash,
12    Char(char),
13    CharClass(Vec<char>),
14    AnyChar,
15    AnyChars,
16    AnyRecur,
17}
18
19#[derive(Debug, PartialEq, Eq)]
20pub enum PatternError {
21    UnmatchedOpenBracket,
22    UnmatchedCloseBracket,
23    EmptyCharClass,
24    InvalidCharInClass,
25    InvalidRecursive,
26}
27
28impl fmt::Display for PatternError {
29    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
30        use PatternError::*;
31
32        match self {
33            UnmatchedOpenBracket => "unmatched opening bracket",
34            UnmatchedCloseBracket => "unmatched closing bracket",
35            EmptyCharClass => "empty character class",
36            InvalidCharInClass => {
37                "invalid character in character class"
38            }
39            InvalidRecursive => "invalid recursive pattern",
40        }
41        .fmt(f)
42    }
43}
44
45impl std::error::Error for PatternError {}
46
47#[derive(Debug)]
48struct PatternIterator<T>
49where
50    T: Iterator<Item = Token> + Clone,
51{
52    tokens: MultiPeek<T>,
53}
54
55impl<T> PatternIterator<T>
56where
57    T: Iterator<Item = Token> + Clone,
58{
59    /// This method is private in order to preserve the invariant that
60    /// `TokenizedPattern` represents a valid pattern.
61    ///
62    /// Use [`TokenizedPattern::from_str`] instead to create a
63    /// `TokenizedPattern` from a string.
64    fn from(tokens: MultiPeek<T>) -> Self {
65        Self { tokens }
66    }
67
68    fn is_more_specific_than(self, other: Self) -> bool {
69        let mut a_iter = self.tokens;
70        let mut b_iter = other.tokens;
71
72        fn recur_moving_a<T>(
73            a_iter: &MultiPeek<T>,
74            b_iter: &MultiPeek<T>,
75        ) -> bool
76        where
77            T: Iterator<Item = Token> + Clone,
78        {
79            let mut a_clone = a_iter.clone();
80            let b_clone = b_iter.clone();
81
82            a_clone.next();
83
84            PatternIterator::from(a_clone)
85                .is_more_specific_than(PatternIterator::from(b_clone))
86        }
87
88        fn recur_moving_b<T>(
89            a_iter: &MultiPeek<T>,
90            b_iter: &MultiPeek<T>,
91        ) -> bool
92        where
93            T: Iterator<Item = Token> + Clone,
94        {
95            let a_clone = a_iter.clone();
96            let mut b_clone = b_iter.clone();
97
98            b_clone.next();
99
100            PatternIterator::from(a_clone)
101                .is_more_specific_than(PatternIterator::from(b_clone))
102        }
103
104        fn recur_moving_both<T>(
105            a_iter: &MultiPeek<T>,
106            b_iter: &MultiPeek<T>,
107        ) -> bool
108        where
109            T: Iterator<Item = Token> + Clone,
110        {
111            let mut a_clone = a_iter.clone();
112            let mut b_clone = b_iter.clone();
113
114            a_clone.next();
115            b_clone.next();
116
117            PatternIterator::from(a_clone)
118                .is_more_specific_than(PatternIterator::from(b_clone))
119        }
120
121        loop {
122            use Token::*;
123
124            let a_token = a_iter.peek();
125            let b_token = b_iter.peek();
126
127            match (a_token, b_token) {
128                (None, None) => return true,
129                (None, Some(AnyChars)) => {
130                    b_iter.next();
131                }
132                (None, Some(_)) | (Some(_), None) => return false,
133                (Some(AnyChars), Some(AnyChars))
134                | (Some(AnyRecur), Some(AnyRecur)) => {
135                    #[cfg(feature = "stack-safe")]
136                    return stacker::maybe_grow(
137                        STACK_RED_ZONE,
138                        STACK_GROW_SIZE,
139                        || {
140                            recur_moving_a(&a_iter, &b_iter)
141                                || recur_moving_b(&a_iter, &b_iter)
142                                || recur_moving_both(&a_iter, &b_iter)
143                        },
144                    );
145                    #[cfg(not(feature = "stack-safe"))]
146                    return recur_moving_a(&a_iter, &b_iter)
147                        || recur_moving_b(&a_iter, &b_iter)
148                        || recur_moving_both(&a_iter, &b_iter);
149                }
150                (Some(Slash), Some(Slash)) => {
151                    let a_next = a_iter.peek_nth(1);
152                    let b_next = b_iter.peek_nth(1);
153
154                    match (a_next, b_next) {
155                        (Some(AnyRecur), Some(AnyRecur)) => {
156                            #[cfg(feature = "stack-safe")]
157                            return stacker::maybe_grow(
158                                STACK_RED_ZONE,
159                                STACK_GROW_SIZE,
160                                || {
161                                    recur_moving_a(&a_iter, &b_iter)
162                                        || recur_moving_b(
163                                            &a_iter, &b_iter,
164                                        )
165                                        || recur_moving_both(
166                                            &a_iter, &b_iter,
167                                        )
168                                },
169                            );
170                            #[cfg(not(feature = "stack-safe"))]
171                            return recur_moving_a(&a_iter, &b_iter)
172                                || recur_moving_b(&a_iter, &b_iter)
173                                || recur_moving_both(&a_iter, &b_iter);
174                        }
175                        (Some(AnyRecur), _) => {
176                            a_iter.next();
177                        }
178                        (_, Some(AnyRecur)) => {
179                            b_iter.next();
180                        }
181                        _ => {
182                            a_iter.next();
183                            b_iter.next();
184                        }
185                    }
186                }
187                (Some(a), Some(b)) if a == b => {
188                    a_iter.next();
189                    b_iter.next();
190                }
191                (Some(AnyChar), Some(CharClass(_)))
192                | (Some(AnyChar), Some(Char(_)))
193                | (Some(CharClass(_)), Some(Char(_))) => return false,
194                (Some(CharClass(_)), Some(AnyChar))
195                | (Some(Char(_)), Some(AnyChar)) => {
196                    a_iter.next();
197                    b_iter.next();
198                }
199                (Some(Char(a)), Some(CharClass(bs))) => {
200                    if !bs.contains(a) {
201                        return false;
202                    }
203
204                    a_iter.next();
205                    b_iter.next();
206                }
207                (Some(AnyChars), Some(_))
208                | (Some(AnyRecur), Some(_)) => return false,
209                (Some(Slash), Some(AnyChars)) => {
210                    b_iter.next();
211                }
212                (Some(_), Some(AnyChars)) => {
213                    #[cfg(feature = "stack-safe")]
214                    return stacker::maybe_grow(
215                        STACK_RED_ZONE,
216                        STACK_GROW_SIZE,
217                        || {
218                            recur_moving_a(&a_iter, &b_iter)
219                                || recur_moving_b(&a_iter, &b_iter)
220                                || recur_moving_both(&a_iter, &b_iter)
221                        },
222                    );
223                    #[cfg(not(feature = "stack-safe"))]
224                    return recur_moving_a(&a_iter, &b_iter)
225                        || recur_moving_b(&a_iter, &b_iter)
226                        || recur_moving_both(&a_iter, &b_iter);
227                }
228                (Some(Slash), Some(AnyRecur)) => {
229                    #[cfg(feature = "stack-safe")]
230                    return stacker::maybe_grow(
231                        STACK_RED_ZONE,
232                        STACK_GROW_SIZE,
233                        || {
234                            recur_moving_a(&a_iter, &b_iter)
235                                || recur_moving_b(&a_iter, &b_iter)
236                        },
237                    );
238                    #[cfg(not(feature = "stack-safe"))]
239                    return recur_moving_a(&a_iter, &b_iter)
240                        || recur_moving_b(&a_iter, &b_iter);
241                }
242                (Some(_), Some(AnyRecur)) => {
243                    a_iter.next();
244                }
245                (Some(_), Some(_)) => return false,
246            }
247        }
248    }
249
250    pub fn count_matching_chars<U>(
251        self,
252        path: MultiPeek<U>,
253    ) -> Option<usize>
254    where
255        U: Iterator<Item = char> + Clone,
256    {
257        let mut tokens = self.tokens;
258        let mut path_chars = path;
259
260        fn recur_moving_tokens<T, U>(
261            tokens: &MultiPeek<T>,
262            path_chars: &MultiPeek<U>,
263        ) -> Option<usize>
264        where
265            T: Iterator<Item = Token> + Clone,
266            U: Iterator<Item = char> + Clone,
267        {
268            let mut tokens_clone = tokens.clone();
269            let path_chars_clone = path_chars.clone();
270
271            tokens_clone.next();
272
273            PatternIterator::from(tokens_clone)
274                .count_matching_chars(path_chars_clone)
275        }
276
277        fn recur_moving_path<T, U>(
278            tokens: &MultiPeek<T>,
279            path_chars: &MultiPeek<U>,
280        ) -> Option<usize>
281        where
282            T: Iterator<Item = Token> + Clone,
283            U: Iterator<Item = char> + Clone,
284        {
285            let tokens_clone = tokens.clone();
286            let mut path_chars_clone = path_chars.clone();
287
288            path_chars_clone.next();
289
290            PatternIterator::from(tokens_clone)
291                .count_matching_chars(path_chars_clone)
292        }
293
294        fn recur_moving_both<T, U>(
295            tokens: &MultiPeek<T>,
296            path_chars: &MultiPeek<U>,
297        ) -> Option<usize>
298        where
299            T: Iterator<Item = Token> + Clone,
300            U: Iterator<Item = char> + Clone,
301        {
302            let mut tokens_clone = tokens.clone();
303            let mut path_chars_clone = path_chars.clone();
304
305            tokens_clone.next();
306            path_chars_clone.next();
307
308            PatternIterator::from(tokens_clone)
309                .count_matching_chars(path_chars_clone)
310        }
311
312        let mut count = 0;
313
314        loop {
315            use Token::*;
316
317            let token = tokens.peek();
318            let path_char = path_chars.peek();
319
320            match (token, path_char) {
321                (None, None) => return Some(count),
322                (None, Some(_)) => return None,
323                (Some(Slash), Some('/')) => {
324                    let token_next = tokens.peek_nth(1);
325
326                    if let Some(AnyRecur) = token_next {
327                        tokens.next();
328                    } else {
329                        tokens.next();
330                        path_chars.next();
331                        count += 1;
332                    }
333                }
334                (Some(AnyChars), Some('/'))
335                | (Some(AnyChars), None) => {
336                    tokens.next();
337                }
338                (Some(AnyRecur), Some('/')) => {
339                    #[cfg(feature = "stack-safe")]
340                    let remainder = stacker::maybe_grow(
341                        STACK_RED_ZONE,
342                        STACK_GROW_SIZE,
343                        || {
344                            recur_moving_tokens(&tokens, &path_chars)
345                                .or(recur_moving_path(
346                                    &tokens,
347                                    &path_chars,
348                                ))
349                        },
350                    )?;
351                    #[cfg(not(feature = "stack-safe"))]
352                    let remainder =
353                        recur_moving_tokens(&tokens, &path_chars).or(
354                            recur_moving_path(&tokens, &path_chars),
355                        )?;
356
357                    return Some(count + remainder);
358                }
359                (Some(AnyRecur), Some(_)) => {
360                    path_chars.next();
361                }
362                (Some(AnyChars), Some(_)) => {
363                    #[cfg(feature = "stack-safe")]
364                    let remainder = stacker::maybe_grow(
365                        STACK_RED_ZONE,
366                        STACK_GROW_SIZE,
367                        || {
368                            recur_moving_tokens(&tokens, &path_chars)
369                                .or(recur_moving_path(
370                                    &tokens,
371                                    &path_chars,
372                                ))
373                                .or(recur_moving_both(
374                                    &tokens,
375                                    &path_chars,
376                                ))
377                        },
378                    )?;
379                    #[cfg(not(feature = "stack-safe"))]
380                    let remainder =
381                        recur_moving_tokens(&tokens, &path_chars)
382                            .or(recur_moving_path(&tokens, &path_chars))
383                            .or(recur_moving_both(
384                                &tokens,
385                                &path_chars,
386                            ))?;
387
388                    return Some(count + remainder);
389                }
390                (Some(_), Some('/')) | (Some(_), None) => return None,
391                (Some(AnyChar), Some(_)) => {
392                    tokens.next();
393                    path_chars.next();
394                    count += 1;
395                }
396                (Some(CharClass(chars)), Some(b)) => {
397                    if !chars.contains(b) {
398                        return None;
399                    }
400                    tokens.next();
401                    path_chars.next();
402                    count += 1;
403                }
404                (Some(Char(a)), Some(b)) if a == b => {
405                    tokens.next();
406                    path_chars.next();
407                    count += 1;
408                }
409                (Some(_), Some(_)) => return None,
410            }
411        }
412    }
413}
414
415#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
416pub struct Pattern {
417    original: String,
418    tokens: Vec<Token>,
419}
420
421impl Pattern {
422    /// `self` is weakly more specific than `other`.
423    ///
424    /// If `self` and `other` are not comparable, then this method
425    /// returns false both ways.  If `self` and `other` are the same,
426    /// then it returns true both ways.
427    ///
428    /// # Examples
429    ///
430    /// TODO
431    pub fn is_more_specific_than(&self, other: &Self) -> bool {
432        let self_tokens = self.tokens.to_vec();
433        let other_tokens = other.tokens.to_vec();
434
435        PatternIterator::from(multipeek(self_tokens))
436            .is_more_specific_than(PatternIterator::from(multipeek(
437                other_tokens,
438            )))
439    }
440
441    /// Counts the number of characters in `path` that are either
442    ///
443    /// 1. matched exactly (e.g., `a`),
444    /// 2. matched by a character class (e.g., `[a-f]`), or
445    /// 3. matched by a single-character wildcard (`?`)
446    ///
447    /// This means that it does not count characters that are matched by
448    /// `*` or `**`.
449    ///
450    /// Returns `None` if the pattern does not match `path`.
451    ///
452    /// # Examples
453    ///
454    /// ```
455    /// use std::str::FromStr;
456    /// use globcmp_lib::Pattern;
457    ///
458    /// let pattern = Pattern::from_str("foo/bar").unwrap();
459    /// let count = pattern.count_matching_chars("foo/bar");
460    /// assert_eq!(count, Some(7));
461    /// ```
462    ///
463    /// ```
464    /// use std::str::FromStr;
465    /// use globcmp_lib::Pattern;
466    ///
467    /// let pattern = Pattern::from_str("foo/*r").unwrap();
468    /// let count = pattern.count_matching_chars("foo/bar");
469    /// assert_eq!(count, Some(5));
470    /// ```
471    ///
472    /// ```
473    /// use std::str::FromStr;
474    /// use globcmp_lib::Pattern;
475    ///
476    /// let pattern = Pattern::from_str("foo/*z").unwrap();
477    /// let count = pattern.count_matching_chars("foo/bar");
478    /// assert!(count.is_none());
479    /// ```
480    pub fn count_matching_chars(&self, path: &str) -> Option<usize> {
481        let self_tokens = self.tokens.to_vec();
482
483        PatternIterator::from(multipeek(self_tokens))
484            .count_matching_chars(multipeek(path.chars()))
485    }
486
487    /// Returns the string representation with which
488    /// [`Pattern::from_str`] was called.
489    pub fn as_str(&self) -> &str {
490        self.original.as_str()
491    }
492}
493
494impl FromStr for Pattern {
495    type Err = PatternError;
496
497    /// Tries to convert a pattern into a valid `Pattern`.
498    ///
499    /// # Invariants
500    ///
501    /// TODO
502    ///
503    /// # Examples
504    ///
505    /// TODO with assert!(...is_ok()) and assert!(...is_err()).
506    fn from_str(value: &str) -> Result<Self, Self::Err> {
507        let mut tokens = Vec::new();
508        let mut class_buf = Vec::<char>::new();
509        let mut in_char_class = false;
510        let mut pending_star = false;
511
512        for char in value.chars() {
513            if char == '*' {
514                if !pending_star {
515                    pending_star = true;
516                } else {
517                    tokens.push(Token::AnyRecur);
518                    pending_star = false;
519                }
520                continue;
521            } else if pending_star {
522                tokens.push(Token::AnyChars);
523                pending_star = false;
524            }
525
526            let token = match char {
527                '/' => Token::Slash,
528                '?' => Token::AnyChar,
529                '[' => {
530                    if in_char_class {
531                        return Err(Self::Err::InvalidCharInClass);
532                    } else {
533                        in_char_class = true;
534                        continue;
535                    }
536                }
537                ']' => {
538                    if !in_char_class {
539                        return Err(Self::Err::UnmatchedCloseBracket);
540                    } else if class_buf.is_empty() {
541                        return Err(Self::Err::EmptyCharClass);
542                    } else {
543                        in_char_class = false;
544
545                        let mut buf_iter = multipeek(&class_buf);
546                        let mut char_class =
547                            Vec::with_capacity(class_buf.len());
548                        let mut prev = None;
549
550                        while let Some(&c) = buf_iter.next() {
551                            if c == '-'
552                                && let Some(c_prev) = prev
553                                && let Some(&&c_next) = buf_iter.peek()
554                            {
555                                let range = Range {
556                                    start: c_prev,
557                                    end: c_next,
558                                };
559
560                                if range.is_empty() {
561                                    // Remove `c_prev` which we have
562                                    // pushed to `char_class` in the
563                                    // previous iteration.
564                                    char_class.pop();
565
566                                    // Skip `c_next` in the next
567                                    // iteration.
568                                    buf_iter.next();
569                                } else {
570                                    // Skip because `c_prev` was already
571                                    // pushed to `char_class` in the
572                                    // previous iteration.
573                                    char_class.extend(range.skip(1));
574                                }
575                            } else {
576                                char_class.push(c);
577                            }
578
579                            prev = Some(c);
580                        }
581
582                        Token::CharClass(std::mem::take(
583                            &mut char_class,
584                        ))
585                    }
586                }
587                _ => {
588                    if in_char_class {
589                        class_buf.push(char);
590                        continue;
591                    } else {
592                        Token::Char(char)
593                    }
594                }
595            };
596
597            tokens.push(token);
598        }
599
600        if in_char_class {
601            return Err(Self::Err::UnmatchedOpenBracket);
602        }
603
604        if pending_star {
605            tokens.push(Token::AnyChars);
606        }
607
608        // Validate the usage of `**`.  It should never be preceded or
609        // succeeded by any token other than `/`.
610        //
611
612        let mut prev_token: Option<&Token> = None;
613
614        for token in tokens.iter() {
615            if *token == Token::AnyRecur
616                && let Some(p) = prev_token
617                && *p != Token::Slash
618            {
619                return Err(Self::Err::InvalidRecursive);
620            } else if prev_token == Some(&Token::AnyRecur)
621                && *token != Token::Slash
622            {
623                return Err(Self::Err::InvalidRecursive);
624            }
625
626            prev_token = Some(token);
627        }
628
629        Ok(Self {
630            original: value.to_string(),
631            tokens,
632        })
633    }
634}
635
636#[cfg(test)]
637mod tests {
638    use super::*;
639
640    #[test]
641    fn tokenize_empty_str() {
642        assert_eq!(
643            Pattern::from_str(""),
644            Ok(Pattern {
645                original: String::new(),
646                tokens: Vec::new()
647            })
648        );
649    }
650
651    #[test]
652    fn tokenize_exact() {
653        assert_eq!(
654            Pattern::from_str("foo/bar"),
655            Ok(Pattern {
656                original: "foo/bar".to_string(),
657                tokens: vec![
658                    Token::Char('f'),
659                    Token::Char('o'),
660                    Token::Char('o'),
661                    Token::Slash,
662                    Token::Char('b'),
663                    Token::Char('a'),
664                    Token::Char('r'),
665                ]
666            })
667        );
668    }
669
670    #[test]
671    fn tokenize_any_char() {
672        assert_eq!(
673            Pattern::from_str("foo/b?r"),
674            Ok(Pattern {
675                original: "foo/b?r".to_string(),
676                tokens: vec![
677                    Token::Char('f'),
678                    Token::Char('o'),
679                    Token::Char('o'),
680                    Token::Slash,
681                    Token::Char('b'),
682                    Token::AnyChar,
683                    Token::Char('r'),
684                ]
685            })
686        );
687    }
688
689    #[test]
690    fn tokenize_any_chars() {
691        assert_eq!(
692            Pattern::from_str("foo/b*r"),
693            Ok(Pattern {
694                original: "foo/b*r".to_string(),
695                tokens: vec![
696                    Token::Char('f'),
697                    Token::Char('o'),
698                    Token::Char('o'),
699                    Token::Slash,
700                    Token::Char('b'),
701                    Token::AnyChars,
702                    Token::Char('r'),
703                ]
704            })
705        );
706
707        assert_eq!(
708            Pattern::from_str("foo/*b*r*"),
709            Ok(Pattern {
710                original: "foo/*b*r*".to_string(),
711                tokens: vec![
712                    Token::Char('f'),
713                    Token::Char('o'),
714                    Token::Char('o'),
715                    Token::Slash,
716                    Token::AnyChars,
717                    Token::Char('b'),
718                    Token::AnyChars,
719                    Token::Char('r'),
720                    Token::AnyChars,
721                ]
722            })
723        );
724    }
725
726    #[test]
727    fn tokenize_valid_char_class() {
728        assert_eq!(
729            Pattern::from_str("foo/ba[rz]"),
730            Ok(Pattern {
731                original: "foo/ba[rz]".to_string(),
732                tokens: vec![
733                    Token::Char('f'),
734                    Token::Char('o'),
735                    Token::Char('o'),
736                    Token::Slash,
737                    Token::Char('b'),
738                    Token::Char('a'),
739                    Token::CharClass(vec!['r', 'z']),
740                ]
741            })
742        );
743
744        let mut chars = vec!['a'];
745        chars.extend('p'..='z');
746
747        assert_eq!(
748            Pattern::from_str("foo/ba[ap-z]"),
749            Ok(Pattern {
750                original: "foo/ba[ap-z]".to_string(),
751                tokens: vec![
752                    Token::Char('f'),
753                    Token::Char('o'),
754                    Token::Char('o'),
755                    Token::Slash,
756                    Token::Char('b'),
757                    Token::Char('a'),
758                    Token::CharClass(chars),
759                ]
760            })
761        );
762
763        // `[az-p]` only matches `a` because `z-p` is the empty
764        // interval.
765        assert_eq!(
766            Pattern::from_str("foo/ba[az-p]"),
767            Ok(Pattern {
768                original: "foo/ba[az-p]".to_string(),
769                tokens: vec![
770                    Token::Char('f'),
771                    Token::Char('o'),
772                    Token::Char('o'),
773                    Token::Slash,
774                    Token::Char('b'),
775                    Token::Char('a'),
776                    Token::CharClass(vec!['a']),
777                ]
778            })
779        );
780
781        assert_eq!(
782            Pattern::from_str("foo/ba[a-]"),
783            Ok(Pattern {
784                original: "foo/ba[a-]".to_string(),
785                tokens: vec![
786                    Token::Char('f'),
787                    Token::Char('o'),
788                    Token::Char('o'),
789                    Token::Slash,
790                    Token::Char('b'),
791                    Token::Char('a'),
792                    Token::CharClass(vec!['a', '-']),
793                ]
794            })
795        );
796
797        assert_eq!(
798            Pattern::from_str("foo/ba[-a]"),
799            Ok(Pattern {
800                original: "foo/ba[-a]".to_string(),
801                tokens: vec![
802                    Token::Char('f'),
803                    Token::Char('o'),
804                    Token::Char('o'),
805                    Token::Slash,
806                    Token::Char('b'),
807                    Token::Char('a'),
808                    Token::CharClass(vec!['-', 'a']),
809                ]
810            })
811        );
812    }
813
814    #[test]
815    fn tokenize_empty_char_class() {
816        assert_eq!(
817            Pattern::from_str("foo/ba[]"),
818            Err(PatternError::EmptyCharClass)
819        );
820    }
821
822    #[test]
823    fn tokenize_unclosed_char_class() {
824        assert_eq!(
825            Pattern::from_str("foo/ba[rz"),
826            Err(PatternError::UnmatchedOpenBracket)
827        );
828    }
829
830    #[test]
831    fn tokenize_unopened_char_class() {
832        assert_eq!(
833            Pattern::from_str("foo/barz]"),
834            Err(PatternError::UnmatchedCloseBracket)
835        );
836    }
837
838    #[test]
839    fn tokenize_invalid_char_class() {
840        assert_eq!(
841            Pattern::from_str("foo/ba[[rz]"),
842            Err(PatternError::InvalidCharInClass)
843        );
844    }
845
846    #[test]
847    fn tokenize_valid_recursive() {
848        assert_eq!(
849            Pattern::from_str("foo/**/baz"),
850            Ok(Pattern {
851                original: "foo/**/baz".to_string(),
852                tokens: vec![
853                    Token::Char('f'),
854                    Token::Char('o'),
855                    Token::Char('o'),
856                    Token::Slash,
857                    Token::AnyRecur,
858                    Token::Slash,
859                    Token::Char('b'),
860                    Token::Char('a'),
861                    Token::Char('z'),
862                ]
863            })
864        );
865    }
866
867    #[test]
868    fn tokenize_invalid_recursive() {
869        for pattern in [
870            "x**/bar/baz",
871            "**x/bar/baz",
872            "***/bar/baz",
873            "foo/**x/baz",
874            "foo/x**/baz",
875            "foo/***/baz",
876            "foo/bar/x**",
877            "foo/bar/**x",
878            "foo/bar/***",
879        ] {
880            assert_eq!(
881                Pattern::from_str(dbg!(pattern)),
882                Err(PatternError::InvalidRecursive)
883            );
884        }
885    }
886
887    macro_rules! same_specificity {
888        ($a:expr, $b:expr) => {
889            let a_str = $a;
890            let a_pattern = Pattern::from_str(a_str)
891                .expect("first pattern should be valid");
892
893            let b_str = $b;
894            let b_pattern = Pattern::from_str(b_str)
895                .expect("second pattern should be valid");
896
897            assert!(
898                a_pattern.is_more_specific_than(&b_pattern),
899                "{a_str:?} should be more specific than {b_str:?}"
900            );
901            assert!(
902                b_pattern.is_more_specific_than(&a_pattern),
903                "{b_str:?} should be more specific than {a_str:?}"
904            );
905        };
906    }
907
908    macro_rules! more_specific {
909        ($a:expr, $b:expr) => {
910            let a_str = $a;
911            let a_pattern = Pattern::from_str(a_str)
912                .expect("first pattern should be valid");
913
914            let b_str = $b;
915            let b_pattern = Pattern::from_str(b_str)
916                .expect("second pattern should be valid");
917
918            assert!(
919                a_pattern.is_more_specific_than(&b_pattern),
920                "{a_str:?} should be more specific than {b_str:?}"
921            );
922            assert!(
923                !b_pattern.is_more_specific_than(&a_pattern),
924                "{b_str:?} should not be more specific than {a_str:?}"
925            );
926        };
927    }
928
929    macro_rules! unknown_specificity {
930        ($a:expr, $b:expr) => {
931            let a_str = $a;
932            let a_pattern = Pattern::from_str(a_str)
933                .expect("first pattern should be valid");
934
935            let b_str = $b;
936            let b_pattern = Pattern::from_str(b_str)
937                .expect("second pattern should be valid");
938
939            assert!(
940                !a_pattern.is_more_specific_than(&b_pattern),
941                "{a_str:?} should not be more specific than {b_str:?}"
942            );
943            assert!(
944                !b_pattern.is_more_specific_than(&a_pattern),
945                "{b_str:?} should not be more specific than {a_str:?}"
946            );
947        };
948    }
949
950    #[test]
951    fn pattern_specificity() {
952        same_specificity!("foo", "foo");
953        same_specificity!("foo/bar", "foo/bar");
954
955        // Any character.
956        for pattern in [
957            "?oo/bar", "??o/bar", "???/bar", "f?o/bar", "fo?/bar",
958            "foo/?ar", "foo/??r", "foo/???", "foo/b?r", "foo/ba?",
959        ] {
960            // Versus exact path.
961            more_specific!("foo/bar", dbg!(pattern));
962
963            // Versus same any-character pattern.
964            same_specificity!(pattern, pattern);
965        }
966
967        // Wildcard.
968        for pattern in [
969            // Superfluous wildcard.
970            "*foo/bar", "f*oo/bar", "foo*/bar", "foo/*bar", "foo/b*ar",
971            "foo/bar*",
972            // Wildcard matching a single character.
973            "*oo/bar", "*o*/bar", "f*o/bar", "fo*/bar", "foo/*ar",
974            "foo/*a*", "foo/b*r", "foo/ba*",
975            // Wildcard matching multiple characters.
976            "*/bar", "foo/*",
977        ] {
978            more_specific!("foo/bar", dbg!(pattern));
979            same_specificity!(pattern, pattern);
980        }
981
982        // Recursive wildcard.
983        for pattern in ["**/bar", "foo/**/bar"] {
984            more_specific!("foo/bar", dbg!(pattern));
985            same_specificity!(pattern, pattern);
986        }
987
988        more_specific!("foo/**/baz/lorem", "foo/**/lorem");
989        more_specific!("foo/bar/**/lorem", "foo/**/lorem");
990        more_specific!("foo/bar/**/lorem", "**/lorem");
991        more_specific!("foo/**/bar/baz", "**/bar/baz");
992
993        // Wildcard at shallower vs. deeper level.
994        more_specific!("foo/*/*", "*/*/*");
995        more_specific!("foo/bar/*", "*/*/*");
996        more_specific!("foo/bar/*", "foo/*/*");
997        more_specific!("foo/bar/baz", "*/*/*");
998        more_specific!("foo/bar/baz", "foo/*/*");
999        more_specific!("foo/bar/baz", "foo/bar/*");
1000        more_specific!("foo/bar/baz", "foo/bar/b*");
1001        more_specific!("foo/bar/baz", "foo/bar/ba*");
1002        more_specific!("foo/bar/baz", "foo/bar/baz*");
1003        more_specific!("foo/bar/baz", "foo/bar/*z");
1004        more_specific!("foo/bar/baz", "foo/bar/*az");
1005        more_specific!("foo/bar/baz", "foo/bar/*baz");
1006        more_specific!("foo/bar/baz", "foo/bar/b*az");
1007        more_specific!("foo/bar/baz", "foo/bar/ba*z");
1008        more_specific!("foo/bar/baz", "foo/bar/b*a*z");
1009        more_specific!("foo/bar/baz", "foo/bar/*b*a*z*");
1010        more_specific!("foo/bar/baz", "foo/b?r/baz");
1011        more_specific!("foo/bar/baz", "foo/b??/???");
1012        more_specific!("foo/bar/???", "foo/b??/???");
1013        more_specific!("foo/b??/baz", "foo/b??/???");
1014        more_specific!("foo/ba?/???", "foo/b??/???");
1015
1016        // NOTE We could argue that "**/bar" matches at a lower level
1017        // than "foo/**/*", and that it is thus more specific.  We don't
1018        // do this though.
1019        unknown_specificity!("**/bar", "foo/*");
1020        unknown_specificity!("**/bar", "foo/**/*");
1021        unknown_specificity!("**/bar/baz", "foo/**/baz");
1022
1023        // NOTE If we could compare the patterns against a concrete
1024        // path, e.g., "lorem/foo/bar/baz", then we could argue that
1025        // "**/foo/**/*" matches at a higher level than "**/bar/**/*",
1026        // and thus it is less specific.  But not having a concrete path
1027        // to compare against, we cannot say anything about specificity.
1028        unknown_specificity!("**/foo/**/*", "**/bar/**/*");
1029
1030        more_specific!("foo/b*r", "foo/*r");
1031        more_specific!("foo/b*r", "foo/*");
1032        more_specific!("foo/b?r", "foo/b*r");
1033
1034        // Character classes.
1035        more_specific!("foo/bar", "foo/ba[rz]");
1036        more_specific!("foo/ba[rz]", "foo/ba?");
1037        more_specific!("foo/ba[rz]", "foo/ba*");
1038        more_specific!("foo/bar", "foo/ba[p-z]");
1039        more_specific!("foo/ba[p-z]", "foo/ba?");
1040        more_specific!("foo/ba[p-z]", "foo/ba*");
1041        more_specific!("foo/ba?", "foo/ba*");
1042
1043        // Wildcard placement in the filename.
1044        more_specific!("foo/bar.*", "foo/*");
1045        more_specific!("foo/*.c", "foo/*");
1046        unknown_specificity!("foo/bar.*", "foo/*.c");
1047        more_specific!("**/bar.*", "**/*");
1048        more_specific!("**/*.c", "**/*");
1049        unknown_specificity!("**/bar.*", "**/*.c");
1050        unknown_specificity!("foo/**/baz.*", "**/bar/*.c");
1051
1052        // Patterns that don't match the same paths.
1053        unknown_specificity!("foo/bar", "lorem/ipsum");
1054        unknown_specificity!("foo/ba[rz]", "foo/bax");
1055        unknown_specificity!("foo/ba?", "foo/baxx");
1056        unknown_specificity!("foo/*", "foo/bar/*");
1057
1058        // Slashes must be matched exactly.
1059        unknown_specificity!("foo?bar", "foo/bar");
1060        unknown_specificity!("foo*bar", "foo/bar");
1061    }
1062
1063    macro_rules! count_of {
1064        ($pattern:expr, $path:expr, $expected:expr) => {
1065            let pattern = Pattern::from_str($pattern)
1066                .expect("pattern should be valid");
1067
1068            assert_eq!(pattern.count_matching_chars($path), $expected);
1069        };
1070    }
1071
1072    #[test]
1073    fn count_of_matching_chars() {
1074        count_of!("", "", Some(0));
1075        count_of!("*", "bar", Some(0));
1076        count_of!("foo", "bar", None);
1077        count_of!("f*", "bar", None);
1078        count_of!("fo*", "bar", None);
1079        count_of!("*o", "bar", None);
1080        count_of!("f*o", "bar", None);
1081        count_of!("*o*", "bar", None);
1082
1083        count_of!("foo", "foo", Some(3));
1084
1085        count_of!("foo*", "foobar", Some(3));
1086        count_of!("foo*r", "foobar", Some(4));
1087        count_of!("f*o*a*", "foobar", Some(3));
1088        count_of!("*o*b*r", "foobar", Some(3));
1089
1090        count_of!("foo???", "foobar", Some(6));
1091        count_of!("f?o?a?", "foobar", Some(6));
1092        count_of!("?o?b?r", "foobar", Some(6));
1093        count_of!("foo[a-f][a-f]r", "foobar", Some(6));
1094
1095        count_of!("foo/*r", "foo/bar", Some(5));
1096        count_of!("foo/**/*r", "foo/bar", Some(5));
1097
1098        count_of!("foo/bar", "foo/bar", Some(7));
1099        count_of!("foo/**/foo", "foo/bar", None);
1100        count_of!("foo/**/foo", "foo/bar/baz/foo", Some(7));
1101        count_of!("foo/bar/baz/foo", "foo/bar/baz/foo", Some(15));
1102
1103        count_of!("foo?bar", "foo/bar", None);
1104        count_of!("foo*bar", "foo/bar", None);
1105    }
1106}