wildcard/
lib.rs

1// Copyright 2024 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![no_std]
16#![cfg_attr(feature = "fatal-warnings", deny(warnings))]
17// Note: If you change this remember to update `README.md`. To do so run `cargo rdme`.
18//! `wildcard` is a rust crate for wildcard matching.
19//!
20//! Here's how to use it:
21//!
22//! ```rust
23//! # use wildcard::Wildcard;
24//! #
25//! let wildcard = Wildcard::new("*foo?*bar".as_bytes()).unwrap();
26//!
27//! assert!(wildcard.is_match("fooofooobar".as_bytes()));
28//! ```
29//!
30//! Special characters can be escaped to represent their literal symbol:
31//!
32//! ```rust
33//! # use wildcard::Wildcard;
34//! #
35//! let wildcard = Wildcard::new(r"\*\?".as_bytes()).unwrap();
36//!
37//! assert!(!wildcard.is_match("ab".as_bytes()));
38//! assert!(wildcard.is_match("*?".as_bytes()));
39//! ```
40//!
41//! You can also capture the substring that matched the metasymbols of the wildcard:
42//!
43//! ```rust
44//! # use wildcard::Wildcard;
45//! #
46//! let wildcard = Wildcard::new("* is a * style?".as_bytes()).unwrap();
47//!
48//! let captures: Vec<&[u8]> = wildcard.captures("Lambic is a beer style!".as_bytes()).unwrap();
49//!
50//! assert_eq!(captures, ["Lambic".as_bytes(), "beer".as_bytes(), "!".as_bytes()]);
51//! ```
52//!
53//! # String matching
54//!
55//! For performance reasons `wildcard` does not match directly on strings, but it supports matching
56//! on slices of `char`s:
57//!
58//! ```rust
59//! # use wildcard::Wildcard;
60//! #
61//! let p = "*foo?*bar".chars().collect::<Vec<_>>();
62//! let wildcard = Wildcard::new(&p).unwrap();
63//!
64//! assert!(wildcard.is_match(&"fooofooobar".chars().collect::<Vec<_>>()));
65//! ```
66//!
67//! # Matching customization
68//!
69//! With `wildcard` you can configure these properties of a wildcard:
70//!
71//! 1. Configure the symbols for the metasymbols `*` and `?` as well as the escape symbol.
72//! 2. Support for the metasymbol `?` can be disabled.
73//! 3. Support for escaping can be disabled.
74//! 4. Support for case-insensitive matching.
75
76extern crate alloc;
77
78use alloc::borrow::Cow;
79use alloc::vec::Vec;
80use core::fmt::{Debug, Display, Formatter};
81use core::ops::Range;
82use thiserror::Error;
83
84/// Error representing an invalid [`Wildcard`] creation.
85#[derive(Eq, PartialEq, Error, Debug)]
86pub enum WildcardError {
87    /// The wildcard contains a syntax error.
88    #[error("wildcard syntax error at position {position}: {message}")]
89    Syntax {
90        /// First position of the error in the wildcard.
91        position: usize,
92        /// Description of the error.
93        message: &'static str,
94    },
95    /// The wildcard matching not configured correctly, as it contains repeated special symbols
96    /// (either the metasymbols or the escape symbol).
97    #[error("invalid configuration of special symbols")]
98    InvalidSpecialSymbolsConfiguration,
99}
100
101/// This trait defined the alphabet a wildcard can be used on.
102pub trait WildcardSymbol: Eq + Copy {
103    /// Default metasymbol that matches on sequences of arbitrary length.
104    ///
105    /// This is typically `*`.
106    const DEFAULT_METASYMBOL_ANY: Self;
107    /// Default metasymbol that matches on a single symbol.
108    ///
109    /// This is typically `?`.
110    const DEFAULT_METASYMBOL_ONE: Self;
111    /// Default metasymbol used for escaping.
112    ///
113    /// This is typically `\`.
114    const DEFAULT_METASYMBOL_ESCAPE: Self;
115
116    /// Checks equality in a case-insensitive way.
117    fn eq_case_insensitive(a: Self, b: Self) -> bool;
118}
119
120impl WildcardSymbol for u8 {
121    const DEFAULT_METASYMBOL_ANY: u8 = b'*';
122    const DEFAULT_METASYMBOL_ONE: u8 = b'?';
123    const DEFAULT_METASYMBOL_ESCAPE: u8 = b'\\';
124
125    fn eq_case_insensitive(a: u8, b: u8) -> bool {
126        a.eq_ignore_ascii_case(&b)
127    }
128}
129
130impl WildcardSymbol for char {
131    const DEFAULT_METASYMBOL_ANY: char = '*';
132    const DEFAULT_METASYMBOL_ONE: char = '?';
133    const DEFAULT_METASYMBOL_ESCAPE: char = '\\';
134
135    fn eq_case_insensitive(a: char, b: char) -> bool {
136        a.to_lowercase().eq(b.to_lowercase())
137    }
138}
139
140#[derive(Clone)]
141struct WildcardMatchingConfig<S> {
142    metasymbol_any: S,
143    metasymbol_one: Option<S>,
144    symbol_escape: Option<S>,
145    case_insensitive: bool,
146}
147
148impl<S> WildcardMatchingConfig<S>
149where
150    S: WildcardSymbol,
151{
152    fn validate(&self) -> Result<(), WildcardError> {
153        // Special symbols cannot be the same.
154        let has_special_symbol_repetitions = is_symbol(self.metasymbol_any, self.metasymbol_one)
155            || is_symbol(self.metasymbol_any, self.symbol_escape)
156            || (self.metasymbol_one.is_some() && self.metasymbol_one == self.symbol_escape);
157
158        (!has_special_symbol_repetitions)
159            .then_some(())
160            .ok_or(WildcardError::InvalidSpecialSymbolsConfiguration)
161    }
162}
163
164impl<S> Default for WildcardMatchingConfig<S>
165where
166    S: WildcardSymbol,
167{
168    fn default() -> Self {
169        WildcardMatchingConfig {
170            metasymbol_any: S::DEFAULT_METASYMBOL_ANY,
171            metasymbol_one: Some(S::DEFAULT_METASYMBOL_ONE),
172            symbol_escape: Some(S::DEFAULT_METASYMBOL_ESCAPE),
173            case_insensitive: false,
174        }
175    }
176}
177
178/// A builder of a [`Wildcard`]. This allows you to configure specific behavior of the wildcard
179/// matching.
180pub struct WildcardBuilder<'a, S = u8>
181where
182    S: WildcardSymbol,
183{
184    pattern: Cow<'a, [S]>,
185    config: WildcardMatchingConfig<S>,
186}
187
188impl<'a, S> WildcardBuilder<'a, S>
189where
190    S: WildcardSymbol,
191{
192    /// Creates a wildcard builder.
193    #[must_use]
194    pub fn new(pattern: &'a [S]) -> WildcardBuilder<'a, S> {
195        WildcardBuilder::from_cow(Cow::Borrowed(pattern))
196    }
197
198    /// Creates a wildcard builder.
199    #[must_use]
200    pub fn from_owned(pattern: Vec<S>) -> WildcardBuilder<'static, S> {
201        WildcardBuilder::from_cow(Cow::Owned(pattern))
202    }
203
204    /// Creates a wildcard builder.
205    #[must_use]
206    pub fn from_cow(pattern: Cow<'a, [S]>) -> WildcardBuilder<'a, S> {
207        WildcardBuilder { pattern, config: WildcardMatchingConfig::default() }
208    }
209
210    /// Configures the metasymbol used to match on sequences of arbitrary length.
211    ///
212    /// This is typically `*`.
213    #[must_use]
214    pub fn with_any_metasymbol(mut self, s: S) -> WildcardBuilder<'a, S> {
215        self.config.metasymbol_any = s;
216        self
217    }
218
219    /// Configures the metasymbol used to match on a single symbol.
220    ///
221    /// This is typically `?`.
222    #[must_use]
223    pub fn with_one_metasymbol(mut self, s: S) -> WildcardBuilder<'a, S> {
224        self.config.metasymbol_one = Some(s);
225        self
226    }
227
228    /// Disable the metasymbol use to match on a single symbol.
229    #[must_use]
230    pub fn without_one_metasymbol(mut self) -> WildcardBuilder<'a, S> {
231        self.config.metasymbol_one = None;
232        self
233    }
234
235    /// Configures the symbol use to use for escaping.
236    ///
237    /// This is typically `\`.
238    #[must_use]
239    pub fn with_escape_symbol(mut self, s: S) -> WildcardBuilder<'a, S> {
240        self.config.symbol_escape = Some(s);
241        self
242    }
243
244    /// Disables escaping.
245    #[must_use]
246    pub fn without_escape(mut self) -> WildcardBuilder<'a, S> {
247        self.config.symbol_escape = None;
248        self
249    }
250
251    /// Configures case sensitivity used when matching.
252    ///
253    /// Note that special symbols are always matched verbatim.
254    #[must_use]
255    pub fn case_insensitive(mut self, on: bool) -> WildcardBuilder<'a, S> {
256        self.config.case_insensitive = on;
257        self
258    }
259
260    /// Builds the wildcard.
261    pub fn build(self) -> Result<Wildcard<'a, S>, WildcardError> {
262        Wildcard::new_with_config(self.pattern, self.config)
263    }
264}
265
266/// A token of a wildcard.
267#[derive(Eq, PartialEq, Copy, Clone, Debug)]
268pub enum WildcardToken<S = u8> {
269    /// Metasymbol that matches any sequence of symbols.
270    MetasymbolAny,
271    /// Metasymbol that matches a single symbol.
272    MetasymbolOne,
273    /// A literal symbol.
274    Symbol(S),
275}
276
277impl<S> WildcardToken<S> {
278    /// Whether the token represents a metasymbol.
279    #[must_use]
280    pub fn is_metasymbol(self) -> bool {
281        match self {
282            WildcardToken::MetasymbolAny | WildcardToken::MetasymbolOne => true,
283            WildcardToken::Symbol(_) => false,
284        }
285    }
286}
287
288/// A wildcard. You can use this to check for match against input sequences:
289///
290/// ```rust
291/// # use wildcard::Wildcard;
292/// #
293/// let wildcard = Wildcard::new("*foo?*bar".as_bytes()).expect("invalid wildcard");
294///
295/// assert!(wildcard.is_match("fooofooobar".as_bytes()));
296/// ```
297#[derive(Clone)]
298pub struct Wildcard<'a, S = u8>
299where
300    S: WildcardSymbol,
301{
302    pattern: Cow<'a, [S]>,
303    config: WildcardMatchingConfig<S>,
304    metasymbol_count: usize,
305}
306
307impl<'a, S> Wildcard<'a, S>
308where
309    S: WildcardSymbol,
310{
311    /// Creates a wildcard.
312    pub fn new(pattern: &'a [S]) -> Result<Wildcard<'a, S>, WildcardError> {
313        Wildcard::new_with_config(Cow::Borrowed(pattern), WildcardMatchingConfig::default())
314    }
315
316    /// Creates a wildcard.
317    pub fn from_owned(pattern: Vec<S>) -> Result<Wildcard<'static, S>, WildcardError> {
318        Wildcard::new_with_config(Cow::Owned(pattern), WildcardMatchingConfig::default())
319    }
320
321    /// Creates a wildcard.
322    pub fn from_cow(pattern: Cow<'a, [S]>) -> Result<Wildcard<'a, S>, WildcardError> {
323        Wildcard::new_with_config(pattern, WildcardMatchingConfig::default())
324    }
325
326    fn new_with_config(
327        pattern: Cow<'a, [S]>,
328        config: WildcardMatchingConfig<S>,
329    ) -> Result<Wildcard<'a, S>, WildcardError> {
330        config.validate()?;
331
332        let metasymbol_count = validate_syntax(&pattern, &config)?;
333        let wildcard = Wildcard { pattern, config, metasymbol_count };
334
335        Ok(wildcard)
336    }
337
338    /// Checks if `input` matches the wildcard.
339    #[inline]
340    #[must_use]
341    pub fn is_match(&self, input: &[S]) -> bool {
342        // Note that we want to have two different calls with different closures for each case so
343        // each "version" of `matches()` can be properly optimized after monomorphization.
344        match self.config.case_insensitive {
345            true => matches(
346                &self.config,
347                &self.pattern,
348                input,
349                WildcardSymbol::eq_case_insensitive,
350                |_| (),
351            ),
352            false => matches(&self.config, &self.pattern, input, |a, b| a == b, |_| ()),
353        }
354    }
355
356    /// Captures all `input` substrings that matched the wildcard's metasymbols. This returns
357    /// `None` if there's no match.
358    ///
359    /// ```rust
360    /// # use wildcard::Wildcard;
361    /// #
362    /// let wildcard = Wildcard::new("* is part of *".as_bytes()).unwrap();
363    ///
364    /// let captures: Vec<&[u8]> = wildcard.captures("Lisboa is part of Portugal".as_bytes()).unwrap();
365    ///
366    /// assert_eq!(captures, ["Lisboa".as_bytes(), "Portugal".as_bytes()]);
367    /// ```
368    ///
369    /// The captures are done in a lazy way: earlier captures will be as small as possible.
370    #[inline]
371    #[must_use]
372    pub fn captures<'b>(&self, input: &'b [S]) -> Option<Vec<&'b [S]>> {
373        let mut captures = Vec::with_capacity(self.metasymbol_count);
374
375        let is_match = {
376            let capture = |range| captures.push(&input[range]);
377
378            // Note that we want to have two different calls with different closures for each case so
379            // each "version" of `matches()` can be properly optimized after monomorphization.
380            match self.config.case_insensitive {
381                true => matches(
382                    &self.config,
383                    &self.pattern,
384                    input,
385                    WildcardSymbol::eq_case_insensitive,
386                    capture,
387                ),
388                false => matches(&self.config, &self.pattern, input, |a, b| a == b, capture),
389            }
390        };
391
392        match is_match {
393            true => {
394                // The captures for `?` cannot are not emitted by `matches()` (see function
395                // documentation for why that is), so we have to backfill it.
396                if let Some(metasymbol_one) = self.config.metasymbol_one {
397                    fill_in_metasymbol_one_captures(
398                        self.config.metasymbol_any,
399                        metasymbol_one,
400                        self.config.symbol_escape,
401                        &self.pattern,
402                        input,
403                        &mut captures,
404                    );
405                }
406
407                debug_assert_eq!(captures.len(), self.metasymbol_count);
408
409                Some(captures)
410            }
411            false => None,
412        }
413    }
414
415    /// The original pattern from which this wildcard was created.
416    #[must_use]
417    pub fn pattern(&self) -> &[S] {
418        self.pattern.as_ref()
419    }
420
421    /// Number of metasymbols in the wildcard. The number of captures returned by
422    /// [`Wildcard::captures()`], if there is match, will be the same as the number of metasymbols.
423    #[must_use]
424    pub fn metasymbol_count(&self) -> usize {
425        self.metasymbol_count
426    }
427
428    /// Parse the wildcard into tokens.
429    pub fn parsed(&self) -> impl Iterator<Item = WildcardToken<S>> + '_ {
430        let mut i = 0;
431
432        core::iter::from_fn(move || {
433            if i >= self.pattern.len() {
434                None
435            } else {
436                let symbol = self.pattern[i];
437
438                if is_symbol(symbol, self.config.symbol_escape) {
439                    // We can access index `i + 1` because we validated the pattern before.
440                    let s = self.pattern[i + 1];
441                    i += 2;
442                    Some(WildcardToken::Symbol(s))
443                } else if self.pattern[i] == self.config.metasymbol_any {
444                    i += 1;
445                    Some(WildcardToken::MetasymbolAny)
446                } else if is_symbol(symbol, self.config.metasymbol_one) {
447                    i += 1;
448                    Some(WildcardToken::MetasymbolOne)
449                } else {
450                    i += 1;
451                    Some(WildcardToken::Symbol(symbol))
452                }
453            }
454        })
455    }
456}
457
458/// Validates the syntax of the wildcard. Returns the number of metasymbols in the pattern.
459fn validate_syntax<S>(
460    pattern: &[S],
461    config: &WildcardMatchingConfig<S>,
462) -> Result<usize, WildcardError>
463where
464    S: Eq + Copy,
465{
466    // This function is somewhat performance-sensitive. Any changes to this function should be
467    // validated by running the benchmarks against the baseline.
468
469    let symbol_escape = config.symbol_escape;
470    let metasymbol_any = config.metasymbol_any;
471    let metasymbol_one = config.metasymbol_one;
472
473    let pattern_len = pattern.len();
474
475    let mut metasymbols = 0;
476    let mut escape = false;
477    let mut i = 0;
478
479    // The performance is a bit better if we use a `while` instead of iterators.
480    while i < pattern_len {
481        let symbol = pattern[i];
482
483        if escape {
484            if symbol != metasymbol_any
485                && !is_symbol(symbol, metasymbol_one)
486                && !is_symbol(symbol, symbol_escape)
487            {
488                return Err(WildcardError::Syntax {
489                    position: i - 1,
490                    message: "invalid escape sequence",
491                });
492            }
493
494            escape = false;
495        } else if is_symbol(symbol, symbol_escape) {
496            escape = true;
497        } else if symbol == metasymbol_any || is_symbol(symbol, metasymbol_one) {
498            metasymbols += 1;
499        }
500
501        i += 1;
502    }
503
504    if escape {
505        return Err(WildcardError::Syntax {
506            position: pattern_len - 1,
507            message: "incomplete escape sequence at the end of the wildcard",
508        });
509    }
510
511    Ok(metasymbols)
512}
513
514/// This function fills in the captures of `?` based on the captures of `*`. This needs to be done
515/// because [`matches()`] does not emit captures of `?`. Read the documentation of [`matches()`] to
516/// understand why.
517fn fill_in_metasymbol_one_captures<'a, S>(
518    metasymbol_any: S,
519    metasymbol_one: S,
520    symbol_escape: Option<S>,
521    pattern: &[S],
522    input: &'a [S],
523    captures: &mut Vec<&'a [S]>,
524) where
525    S: Eq + Copy,
526{
527    // This function is somewhat performance-sensitive. Any changes to this function should be
528    // validated by running the benchmarks against the baseline.
529
530    let pattern_len = pattern.len();
531
532    let mut input_index = 0;
533    let mut captures_index = 0;
534    let mut escape = false;
535    let mut i = 0;
536
537    // The performance is a bit better if we use a `while` instead of iterators.
538    while i < pattern_len {
539        let symbol = pattern[i];
540
541        if escape {
542            escape = false;
543        } else if is_symbol(symbol, symbol_escape) {
544            escape = true;
545            input_index += 1;
546        } else if symbol == metasymbol_any {
547            input_index += captures[captures_index].len();
548            captures_index += 1;
549        } else if symbol == metasymbol_one {
550            // TODO We can be more clever here to avoid quadratic complexity.
551            captures.insert(captures_index, &input[input_index..=input_index]);
552            captures_index += 1;
553            input_index += 1;
554        } else {
555            input_index += 1;
556        }
557
558        i += 1;
559    }
560}
561
562#[inline(always)]
563fn is_symbol<S>(v: S, opt_symbol: Option<S>) -> bool
564where
565    S: Eq + Copy,
566{
567    match opt_symbol {
568        None => false,
569        Some(u) => u == v,
570    }
571}
572
573/// The algorithm for matching. This is based on [Kurt's algorithm][kurt2016] with the following
574/// optimizations taken from [Krauss algorithm][krauss2014]:
575///
576/// 1. Immediately skip consecutive stars.
577/// 2. When matching a star, immediately fast-forward the input to a symbol that matches the first
578///    symbol after the star. This applies both when star is found and when backtracking.
579///
580/// It also implements escaping of special symbols.
581///
582/// The closure `capture` will be called for all substrings matched by a star. It will *not* be
583/// called for captures of `?`: because we do backtracking we would have to delay captures of ?
584/// which would require us to use more memory.
585///
586/// [kurt2016]: http://dodobyte.com/wildcard.html
587/// [krauss2014]: http://developforperformance.com/MatchingWildcards_AnImprovedAlgorithmForBigData.html
588#[inline]
589fn matches<S>(
590    config: &WildcardMatchingConfig<S>,
591    pattern: &[S],
592    input: &[S],
593    symbol_eq: impl Fn(S, S) -> bool,
594    mut capture: impl FnMut(Range<usize>),
595) -> bool
596where
597    S: Eq + Copy,
598{
599    // This function is very performance-sensitive. Any changes to this function should be validated
600    // by running the benchmarks against the baseline.
601
602    let symbol_escape = config.symbol_escape;
603    let metasymbol_any = config.metasymbol_any;
604    let metasymbol_one = config.metasymbol_one;
605
606    let pattern_len = pattern.len();
607    let input_len = input.len();
608    let mut pattern_index = 0;
609    let mut input_index = 0;
610
611    // This will point to the first pattern symbol after the last star.
612    let mut revert_pattern_index = 0;
613    // After we see a star we will start to try to match the pattern after the star with this
614    // position of the input. We will initially try to match the star with an empty substring.
615    // If we fail to match we will increment this and try again, now trying to match the
616    // star with a substring of length one, and so on.
617    let mut revert_input_index = 0;
618    // This will point to the first symbol of input that matches the star. The current assignment
619    // for the star will be `input[last_star_input_index..revert_input_index]`.
620    let mut last_star_input_index = 0;
621
622    while input_index < input_len {
623        let mut match_failed = false;
624
625        if pattern_index >= pattern_len {
626            match_failed = true;
627        } else {
628            let mut pattern_symbol = pattern[pattern_index];
629            let mut escape = false;
630
631            // Skip the escape symbol.
632            if is_symbol(pattern_symbol, symbol_escape) {
633                // If this is an escape it is safe to advance the one (i.e. we won't be out of
634                // bounds) as this was validated beforehand.
635                pattern_index += 1;
636                pattern_symbol = pattern[pattern_index];
637                escape = true;
638            }
639
640            if pattern_symbol == metasymbol_any && !escape {
641                // If there was a previous star we can now establish its match.
642                if revert_pattern_index > 0 {
643                    capture(last_star_input_index..revert_input_index);
644                }
645
646                pattern_index += 1;
647
648                // If there are multiple stars, skip them.
649                while pattern_index < pattern_len && pattern[pattern_index] == metasymbol_any {
650                    capture(input_index..input_index);
651                    pattern_index += 1;
652                }
653
654                if pattern_index >= pattern_len {
655                    // We reached the end of the pattern, and we just matched a `*`, so we can say for
656                    // sure this is a match.
657                    capture(input_index..input_len);
658                    return true;
659                }
660
661                debug_assert!(pattern_index < pattern_len);
662
663                let pattern_symbol = pattern[pattern_index];
664
665                debug_assert!(pattern_symbol != metasymbol_any);
666
667                last_star_input_index = input_index;
668
669                // We had a `*` so we can advance to the next possible match (what we skip is what the
670                // star consumed).
671                if !is_symbol(pattern_symbol, metasymbol_one)
672                    && !is_symbol(pattern_symbol, symbol_escape)
673                {
674                    while input_index < input_len && !symbol_eq(pattern_symbol, input[input_index])
675                    {
676                        input_index += 1;
677                    }
678                }
679
680                // Update revert data. We will use this if we need to backtrack because of a mismatch.
681                revert_pattern_index = pattern_index;
682                revert_input_index = input_index;
683            } else if !symbol_eq(pattern_symbol, input[input_index])
684                && (!is_symbol(pattern_symbol, metasymbol_one) || escape)
685            {
686                match_failed = true;
687            } else {
688                pattern_index += 1;
689                input_index += 1;
690            }
691        }
692
693        if match_failed {
694            // Here we either reached the end of the pattern or we had a symbol mismatch.
695            // In either case we failed the match, so if we never saw a star before there's no
696            // possible match. If we did find a star before that means we should backtrack and
697            // consider the match where the star consumed one more symbol than the current try.
698
699            // If we never saw a `*` before, there is no match.
700            if revert_pattern_index == 0 {
701                return false;
702            }
703
704            // We need to backtrack. Let's make the star consume one more symbol.
705            revert_input_index += 1;
706
707            debug_assert!(revert_pattern_index < pattern_len);
708
709            let pattern_symbol = pattern[revert_pattern_index];
710
711            debug_assert!(pattern_symbol != metasymbol_any);
712
713            // Advance to the next possible match.
714            if !is_symbol(pattern_symbol, metasymbol_one)
715                && !is_symbol(pattern_symbol, symbol_escape)
716            {
717                while revert_input_index < input_len
718                    && !symbol_eq(pattern_symbol, input[revert_input_index])
719                {
720                    revert_input_index += 1;
721                }
722            }
723
724            pattern_index = revert_pattern_index;
725            input_index = revert_input_index;
726        }
727    }
728
729    // Emit the capture of the last star.
730    if revert_pattern_index > 0 {
731        capture(last_star_input_index..revert_input_index);
732    }
733
734    // If there are trailing stars simply skip them.
735    while pattern_index < pattern_len && pattern[pattern_index] == metasymbol_any {
736        capture(input_index..input_index);
737        pattern_index += 1;
738    }
739
740    // If we consumed the entire pattern we have a match.
741    pattern_index >= pattern_len
742}
743
744impl<'a, S> Debug for Wildcard<'a, S>
745where
746    S: WildcardSymbol + Display,
747{
748    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
749        for s in self.pattern.iter() {
750            Display::fmt(s, f)?;
751        }
752
753        Ok(())
754    }
755}
756
757#[cfg(test)]
758mod tests {
759    use crate::{Wildcard, WildcardBuilder, WildcardError, WildcardToken};
760    use alloc::string::String;
761    use alloc::vec::Vec;
762    use alloc::{format, vec};
763    use quickcheck::{Arbitrary, Gen};
764    use quickcheck_macros::quickcheck;
765
766    fn vec_u8_to_vec_string(v: Vec<&[u8]>) -> Vec<String> {
767        v.into_iter().map(|c| String::from_utf8_lossy(c).into_owned()).collect()
768    }
769
770    pub mod engine_regex_bytes {
771        use crate::{Wildcard, WildcardToken};
772        use alloc::borrow::ToOwned;
773        use alloc::string::String;
774        use alloc::vec::Vec;
775        use regex::bytes::{Regex, RegexBuilder};
776
777        fn make_regex(pattern: &str, case_insensitive: bool) -> Regex {
778            let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
779            let mut regex_pattern = "^".to_owned();
780
781            for token in wildcard.parsed() {
782                match token {
783                    WildcardToken::MetasymbolAny => {
784                        regex_pattern.push_str("(.*?)");
785                    }
786                    WildcardToken::MetasymbolOne => {
787                        regex_pattern.push_str("(.)");
788                    }
789                    WildcardToken::Symbol(s) => {
790                        let slice = &[s];
791                        let s = core::str::from_utf8(slice).expect("invalid utf-8 symbol");
792                        regex_pattern.push_str(&regex::escape(s));
793                    }
794                }
795            }
796
797            regex_pattern.push('$');
798
799            RegexBuilder::new(&regex_pattern)
800                .dot_matches_new_line(true)
801                .crlf(false)
802                .unicode(false)
803                .case_insensitive(case_insensitive)
804                .build()
805                .expect("invalid regex")
806        }
807
808        pub fn matches(pattern: &str, input: &[u8], case_insensitive: bool) -> bool {
809            make_regex(pattern, case_insensitive).is_match(input)
810        }
811
812        pub fn captures<'a>(
813            pattern: &str,
814            input: &'a [u8],
815            case_insensitive: bool,
816        ) -> Option<Vec<&'a [u8]>> {
817            make_regex(pattern, case_insensitive).captures(input).map(|c| {
818                c.iter().flat_map(IntoIterator::into_iter).skip(1).map(|m| m.as_bytes()).collect()
819            })
820        }
821
822        pub fn captures_as_string(
823            pattern: &str,
824            input: &[u8],
825            case_insensitive: bool,
826        ) -> Option<Vec<String>> {
827            captures(pattern, input, case_insensitive)
828                .map(|c| c.iter().map(|m| String::from_utf8_lossy(m).into_owned()).collect())
829        }
830    }
831
832    #[derive(serde::Deserialize, Debug)]
833    struct MatchTest {
834        pattern: String,
835        input: String,
836        matches: bool,
837        #[serde(default, rename = "case-insensitive")]
838        case_insensitive: bool,
839    }
840
841    #[derive(serde::Deserialize)]
842    struct MatchTests {
843        #[serde(rename = "test")]
844        tests: Vec<MatchTest>,
845    }
846
847    fn run_matching_tests(tests: MatchTests) -> Vec<String> {
848        let mut errors = Vec::new();
849
850        assert!(!tests.tests.is_empty());
851
852        for test in tests.tests {
853            let is_match = WildcardBuilder::new(test.pattern.as_bytes())
854                .case_insensitive(test.case_insensitive)
855                .build()
856                .expect("invalid wildcard")
857                .is_match(test.input.as_bytes());
858
859            assert_eq!(
860                engine_regex_bytes::matches(
861                    &test.pattern,
862                    test.input.as_bytes(),
863                    test.case_insensitive
864                ),
865                test.matches,
866                "it seems the test itself is wrong: {test:?}",
867            );
868
869            match (test.matches, is_match) {
870                (true, false) => errors.push(format!(
871                    r#"wildcard "{}" should have matched "{}""#,
872                    test.pattern, test.input
873                )),
874                (false, true) => errors.push(format!(
875                    r#"wildcard "{}" should not have matched "{}""#,
876                    test.pattern, test.input
877                )),
878                (true, true) | (false, false) => (),
879            }
880        }
881
882        errors
883    }
884
885    #[derive(serde::Deserialize, Debug)]
886    struct CaptureTest {
887        pattern: String,
888        input: String,
889        captures: Vec<String>,
890        #[serde(default, rename = "case-insensitive")]
891        case_insensitive: bool,
892    }
893
894    impl CaptureTest {
895        fn from_matching_test(matching_test: MatchTest) -> Option<CaptureTest> {
896            if !matching_test.matches {
897                return None;
898            }
899
900            let captures = engine_regex_bytes::captures_as_string(
901                &matching_test.pattern,
902                matching_test.input.as_bytes(),
903                matching_test.case_insensitive,
904            )
905            .unwrap_or_else(|| {
906                panic!("the test should match but captures() returned None: {matching_test:?}")
907            });
908
909            Some(CaptureTest {
910                pattern: matching_test.pattern,
911                input: matching_test.input,
912                captures,
913                case_insensitive: matching_test.case_insensitive,
914            })
915        }
916    }
917
918    #[derive(serde::Deserialize)]
919    struct CaptureTests {
920        #[serde(rename = "test")]
921        tests: Vec<CaptureTest>,
922    }
923
924    fn run_capture_tests(tests: CaptureTests) -> Vec<String> {
925        let mut errors = Vec::new();
926
927        assert!(!tests.tests.is_empty());
928
929        for test in tests.tests {
930            let wildcard = WildcardBuilder::new(test.pattern.as_bytes())
931                .case_insensitive(test.case_insensitive)
932                .build()
933                .expect("invalid wildcard");
934
935            assert_eq!(
936                engine_regex_bytes::captures_as_string(
937                    &test.pattern,
938                    test.input.as_bytes(),
939                    test.case_insensitive
940                )
941                .as_ref(),
942                Some(&test.captures),
943                "it seems the test itself is wrong: {test:?}",
944            );
945
946            let captures: Option<Vec<String>> =
947                wildcard.captures(test.input.as_bytes()).map(vec_u8_to_vec_string);
948
949            match captures {
950                Some(captures) if captures != test.captures => errors.push(format!(
951                    r#"captures for wildcard "{}" with input "{}" should have been {:?} instead of {:?}"#,
952                    test.pattern, test.input, test.captures, captures,
953                )),
954                None => errors.push(format!(
955                    "we did not get a match on the capture test {test:?}"
956                )),
957                Some(_) => (),
958            }
959        }
960
961        errors
962    }
963
964    #[test]
965    fn test_matching() {
966        let tests = toml::from_str::<MatchTests>(include_str!("../testdata/matching.toml"))
967            .expect("failed to parse test file");
968
969        let errors = run_matching_tests(tests);
970
971        assert!(errors.is_empty(), "{} tests failed:\n{}", errors.len(), errors.join("\n"));
972    }
973
974    #[test]
975    fn test_capture() {
976        let tests = toml::from_str::<CaptureTests>(include_str!("../testdata/capture.toml"))
977            .expect("failed to parse test file");
978
979        let errors = run_capture_tests(tests);
980
981        assert!(errors.is_empty(), "{} tests failed:\n{}", errors.len(), errors.join("\n"));
982
983        // We also create capture tests from the matching tests:
984        let tests = {
985            let t = toml::from_str::<MatchTests>(include_str!("../testdata/matching.toml"))
986                .expect("failed to parse test file");
987
988            CaptureTests {
989                tests: t.tests.into_iter().filter_map(CaptureTest::from_matching_test).collect(),
990            }
991        };
992
993        let errors = run_capture_tests(tests);
994
995        assert!(errors.is_empty(), "{} tests failed:\n{}", errors.len(), errors.join("\n"));
996    }
997
998    #[test]
999    fn test_syntax_validation() {
1000        let pattern = r"\foo";
1001        let expected = WildcardError::Syntax { position: 0, message: r"invalid escape sequence" };
1002
1003        assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1004
1005        let pattern = r"f\oo";
1006        let expected = WildcardError::Syntax { position: 1, message: r"invalid escape sequence" };
1007
1008        assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1009
1010        let pattern = r"foo\";
1011        let expected = WildcardError::Syntax {
1012            position: 3,
1013            message: "incomplete escape sequence at the end of the wildcard",
1014        };
1015
1016        assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1017
1018        let pattern = r"foo\\\";
1019        let expected = WildcardError::Syntax {
1020            position: 5,
1021            message: "incomplete escape sequence at the end of the wildcard",
1022        };
1023
1024        assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1025
1026        let pattern = r"f\?oo";
1027        let expected = WildcardError::Syntax { position: 1, message: r"invalid escape sequence" };
1028        let wildcard = WildcardBuilder::new(pattern.as_bytes()).without_one_metasymbol().build();
1029
1030        assert_eq!(wildcard.err(), Some(expected));
1031
1032        let pattern = r"f\?oo";
1033        let expected = WildcardError::Syntax { position: 1, message: r"invalid escape sequence" };
1034        let wildcard = WildcardBuilder::new(pattern.as_bytes()).with_one_metasymbol(b'!').build();
1035
1036        assert_eq!(wildcard.err(), Some(expected));
1037
1038        let pattern = r"f\*\\o\?o";
1039
1040        assert!(Wildcard::new(pattern.as_bytes()).is_ok());
1041
1042        let pattern = r"f?oo";
1043        let wildcard = WildcardBuilder::new(pattern.as_bytes()).without_one_metasymbol().build();
1044
1045        assert!(wildcard.is_ok());
1046
1047        let pattern = r"f\!o!o";
1048        let wildcard = WildcardBuilder::new(pattern.as_bytes()).with_one_metasymbol(b'!').build();
1049
1050        assert!(wildcard.is_ok());
1051
1052        let pattern = r"f\o\\o\";
1053        let wildcard = WildcardBuilder::new(pattern.as_bytes()).without_escape().build();
1054
1055        assert!(wildcard.is_ok());
1056    }
1057
1058    #[test]
1059    fn test_configuration_repeated_special_symbols() {
1060        let wildcard = WildcardBuilder::new(&[]).with_escape_symbol(b'*').build();
1061
1062        assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1063
1064        let wildcard = WildcardBuilder::new(&[]).with_escape_symbol(b'?').build();
1065
1066        assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1067
1068        let wildcard = WildcardBuilder::new(&[]).with_one_metasymbol(b'*').build();
1069
1070        assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1071
1072        let wildcard = WildcardBuilder::new(&[]).with_one_metasymbol(b'\\').build();
1073
1074        assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1075
1076        let wildcard = WildcardBuilder::new(&[]).with_any_metasymbol(b'?').build();
1077
1078        assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1079
1080        let wildcard = WildcardBuilder::new(&[]).with_any_metasymbol(b'\\').build();
1081
1082        assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1083    }
1084
1085    #[test]
1086    fn test_wildcard_parsed() {
1087        let wildcard = Wildcard::new(r"a\\b?*c\?d\*".as_bytes()).unwrap();
1088        let expected = [
1089            WildcardToken::Symbol(b'a'),
1090            WildcardToken::Symbol(b'\\'),
1091            WildcardToken::Symbol(b'b'),
1092            WildcardToken::MetasymbolOne,
1093            WildcardToken::MetasymbolAny,
1094            WildcardToken::Symbol(b'c'),
1095            WildcardToken::Symbol(b'?'),
1096            WildcardToken::Symbol(b'd'),
1097            WildcardToken::Symbol(b'*'),
1098        ];
1099
1100        assert_eq!(wildcard.parsed().collect::<Vec<_>>(), expected);
1101
1102        let wildcard =
1103            WildcardBuilder::new(r"*?\\".as_bytes()).without_one_metasymbol().build().unwrap();
1104        let expected = [
1105            WildcardToken::MetasymbolAny,
1106            WildcardToken::Symbol(b'?'),
1107            WildcardToken::Symbol(b'\\'),
1108        ];
1109
1110        assert_eq!(wildcard.parsed().collect::<Vec<_>>(), expected);
1111
1112        let wildcard = WildcardBuilder::new(r"*?\".as_bytes()).without_escape().build().unwrap();
1113        let expected = [
1114            WildcardToken::MetasymbolAny,
1115            WildcardToken::MetasymbolOne,
1116            WildcardToken::Symbol(b'\\'),
1117        ];
1118
1119        assert_eq!(wildcard.parsed().collect::<Vec<_>>(), expected);
1120    }
1121
1122    #[test]
1123    fn test_matching_chars() {
1124        let pattern = "a*c".chars().collect::<Vec<char>>();
1125        let wildcard = Wildcard::new(&pattern).expect("invalid wildcard");
1126
1127        assert!(wildcard.is_match(&['a', 'c']));
1128        assert!(!wildcard.is_match(&['a', 'b']));
1129        assert!(wildcard.is_match(&['a', 'b', 'c']));
1130        assert!(wildcard.is_match(&['a', 'b', 'b', 'c']));
1131    }
1132
1133    #[test]
1134    fn test_matching_chars_case_insensitive() {
1135        let pattern = "ω*δ".chars().collect::<Vec<char>>();
1136        let wildcard = WildcardBuilder::new(&pattern)
1137            .case_insensitive(true)
1138            .build()
1139            .expect("invalid wildcard");
1140
1141        assert!(wildcard.is_match(&['ω', 'Δ']));
1142        assert!(!wildcard.is_match(&['ω', 'x']));
1143        assert!(wildcard.is_match(&['Ω', 'x', 'δ']));
1144        assert!(wildcard.is_match(&['ω', 'x', 'x', 'Δ']));
1145    }
1146
1147    #[test]
1148    fn test_capture_chars_case_insensitive() {
1149        let pattern = "ω*δ".chars().collect::<Vec<char>>();
1150        let wildcard = WildcardBuilder::new(&pattern)
1151            .case_insensitive(true)
1152            .build()
1153            .expect("invalid wildcard");
1154
1155        assert_eq!(wildcard.captures(&['ω', 'Δ']), Some(vec![&[] as &[char]]));
1156        assert_eq!(wildcard.captures(&['ω', 'x']), None);
1157        assert_eq!(wildcard.captures(&['Ω', 'x', 'X', 'Δ']), Some(vec![&['x', 'X'] as &[char]]));
1158        assert_eq!(wildcard.captures(&['ω', 'ω', 'Ω', 'Δ']), Some(vec![&['ω', 'Ω'] as &[char]]));
1159    }
1160
1161    #[test]
1162    fn test_debug_format() {
1163        let pattern = r"a*\*?\?".chars().collect::<Vec<char>>();
1164        let wildcard = WildcardBuilder::new(&pattern).build().expect("invalid wildcard");
1165
1166        assert_eq!(format!("{wildcard:?}"), r"a*\*?\?");
1167    }
1168
1169    #[derive(Clone, Debug)]
1170    struct WildcardAndInput {
1171        pattern: String,
1172        input: String,
1173    }
1174
1175    struct NormalCharGen {
1176        chars_range: usize,
1177    }
1178
1179    impl NormalCharGen {
1180        const ALPHA_CHARS: &'static str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
1181
1182        fn new(gen: &mut Gen) -> NormalCharGen {
1183            // Bias towards a small set of characters.
1184            let chars_range = 1 + match u8::arbitrary(gen) % 100 {
1185                0..=10 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 2),
1186                11..=20 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 4),
1187                21..=30 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 6),
1188                31..=40 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 8),
1189                41..=50 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 10),
1190                _ => usize::arbitrary(gen) % NormalCharGen::ALPHA_CHARS.len(),
1191            };
1192
1193            NormalCharGen { chars_range }
1194        }
1195
1196        fn gen(&self, gen: &mut Gen) -> &'static str {
1197            let index = usize::arbitrary(gen) % self.chars_range;
1198            &NormalCharGen::ALPHA_CHARS[index..=index]
1199        }
1200
1201        fn gen_maybe_special(&self, gen: &mut Gen) -> &'static str {
1202            match u8::arbitrary(gen) % 100 {
1203                0..=1 => "*",
1204                2..=3 => "?",
1205                4..=5 => r"\",
1206                _ => self.gen(gen),
1207            }
1208        }
1209
1210        fn widen(&self, gen: &mut Gen) -> NormalCharGen {
1211            NormalCharGen {
1212                chars_range: 1
1213                    + (self.chars_range + (usize::arbitrary(gen) % 5))
1214                        % NormalCharGen::ALPHA_CHARS.len(),
1215            }
1216        }
1217    }
1218
1219    fn arbitrary_length(gen: &mut Gen) -> usize {
1220        match u8::arbitrary(gen) % 100 {
1221            0..=15 => 0,
1222            16..=30 => 1 + usize::arbitrary(gen) % 5,
1223            31..=50 => 1 + usize::arbitrary(gen) % 10,
1224            _ => 1 + usize::arbitrary(gen) % gen.size(),
1225        }
1226    }
1227
1228    fn wildcard_tokens_to_pattern(tokens: &[WildcardToken]) -> String {
1229        let mut pattern = String::with_capacity(2 * tokens.len());
1230
1231        for token in tokens {
1232            match token {
1233                WildcardToken::MetasymbolAny => pattern.push('*'),
1234                WildcardToken::MetasymbolOne => pattern.push('?'),
1235                WildcardToken::Symbol(b) => match b {
1236                    b'*' => pattern.push_str(r"\*"),
1237                    b'?' => pattern.push_str(r"\?"),
1238                    b'\\' => pattern.push_str(r"\\"),
1239                    b => pattern.push(char::from(*b)),
1240                },
1241            }
1242        }
1243
1244        pattern
1245    }
1246
1247    fn create_input_matching_pattern(
1248        gen: &mut Gen,
1249        normal_chars_gen: &NormalCharGen,
1250        pattern: &[WildcardToken],
1251    ) -> String {
1252        // Let's have a wider charset than the pattern.
1253        let normal_chars_gen = normal_chars_gen.widen(gen);
1254        let mut input = String::with_capacity(pattern.len());
1255
1256        for token in pattern {
1257            match token {
1258                WildcardToken::MetasymbolAny => {
1259                    let len = arbitrary_length(gen);
1260
1261                    (0..len).for_each(|_| input.push_str(normal_chars_gen.gen_maybe_special(gen)));
1262                }
1263                WildcardToken::MetasymbolOne => {
1264                    input.push_str(normal_chars_gen.gen_maybe_special(gen));
1265                }
1266                WildcardToken::Symbol(b) => {
1267                    input.push(char::from(*b));
1268                }
1269            }
1270        }
1271
1272        input
1273    }
1274
1275    fn arbitrary_wildcard_token(gen: &mut Gen, normal_chars_gen: &NormalCharGen) -> WildcardToken {
1276        match u8::arbitrary(gen) % 100 {
1277            0..=20 => WildcardToken::MetasymbolAny,
1278            21..=30 => WildcardToken::MetasymbolOne,
1279            _ => {
1280                // We want to have a reasonable chance of hitting something that requires
1281                // escaping.
1282                match u8::arbitrary(gen) % 100 {
1283                    0..=5 => WildcardToken::Symbol(b'*'),
1284                    6..=10 => WildcardToken::Symbol(b'?'),
1285                    11..=15 => WildcardToken::Symbol(b'\\'),
1286                    _ => WildcardToken::Symbol(normal_chars_gen.gen(gen).as_bytes()[0]),
1287                }
1288            }
1289        }
1290    }
1291
1292    fn arbitrary_wildcard_tokens(
1293        gen: &mut Gen,
1294        normal_chars_gen: &NormalCharGen,
1295    ) -> Vec<WildcardToken> {
1296        let len = arbitrary_length(gen);
1297
1298        (0..len).map(|_| arbitrary_wildcard_token(gen, normal_chars_gen)).collect()
1299    }
1300
1301    fn arbitrary_input(gen: &mut Gen, normal_chars_gen: &NormalCharGen) -> String {
1302        // Let's have a wider charset than the pattern.
1303        let normal_chars_gen = normal_chars_gen.widen(gen);
1304        let len = arbitrary_length(gen);
1305
1306        (0..len).map(|_| normal_chars_gen.gen_maybe_special(gen)).collect()
1307    }
1308
1309    impl Arbitrary for WildcardAndInput {
1310        fn arbitrary(gen: &mut Gen) -> WildcardAndInput {
1311            let will_match = bool::arbitrary(gen);
1312
1313            match will_match {
1314                true => {
1315                    let WildcardAndMatchingInput { pattern, input } =
1316                        WildcardAndMatchingInput::arbitrary(gen);
1317
1318                    WildcardAndInput { pattern, input }
1319                }
1320                false => {
1321                    let normal_chars_gen = NormalCharGen::new(gen);
1322                    let pattern_tokens = arbitrary_wildcard_tokens(gen, &normal_chars_gen);
1323                    let pattern = wildcard_tokens_to_pattern(&pattern_tokens);
1324
1325                    let input = arbitrary_input(gen, &normal_chars_gen);
1326
1327                    WildcardAndInput { pattern, input }
1328                }
1329            }
1330        }
1331    }
1332
1333    /// Test case where the input matches the pattern.
1334    #[derive(Clone, Debug)]
1335    struct WildcardAndMatchingInput {
1336        pattern: String,
1337        input: String,
1338    }
1339
1340    impl Arbitrary for WildcardAndMatchingInput {
1341        fn arbitrary(gen: &mut Gen) -> WildcardAndMatchingInput {
1342            let normal_chars_gen = NormalCharGen::new(gen);
1343            let pattern_tokens = arbitrary_wildcard_tokens(gen, &normal_chars_gen);
1344            let pattern = wildcard_tokens_to_pattern(&pattern_tokens);
1345
1346            let input = {
1347                let input = create_input_matching_pattern(gen, &normal_chars_gen, &pattern_tokens);
1348
1349                assert!(
1350                        engine_regex_bytes::matches(&pattern, input.as_bytes(), false),
1351                        "failed to create an input that matched the pattern\npattern: {pattern}\ninput  : {input}",
1352                    );
1353
1354                input
1355            };
1356
1357            WildcardAndMatchingInput { pattern, input }
1358        }
1359    }
1360
1361    #[quickcheck]
1362    fn property_matching_equivalent_to_regex_engine(
1363        WildcardAndInput { pattern, input }: WildcardAndInput,
1364    ) -> bool {
1365        let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1366
1367        wildcard.is_match(input.as_bytes())
1368            == engine_regex_bytes::matches(&pattern, input.as_bytes(), false)
1369    }
1370
1371    #[quickcheck]
1372    fn property_capture_equivalent_to_regex_engine(
1373        WildcardAndInput { pattern, input }: WildcardAndInput,
1374    ) -> bool {
1375        let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1376
1377        wildcard.captures(input.as_bytes())
1378            == engine_regex_bytes::captures(&pattern, input.as_bytes(), false)
1379    }
1380
1381    #[quickcheck]
1382    fn property_capture_length_equals_number_of_metacharacters(
1383        WildcardAndMatchingInput { pattern, input }: WildcardAndMatchingInput,
1384    ) -> bool {
1385        let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1386        let metacharacter_count = wildcard.parsed().filter(|token| token.is_metasymbol()).count();
1387
1388        let captures = wildcard
1389            .captures(input.as_bytes())
1390            .map(|c| c.len())
1391            .expect("this test should only get matching test cases");
1392
1393        captures == metacharacter_count
1394    }
1395
1396    #[quickcheck]
1397    fn property_captures_agree_with_is_match(
1398        WildcardAndInput { pattern, input }: WildcardAndInput,
1399    ) -> bool {
1400        let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1401
1402        let captures = wildcard.captures(input.as_bytes()).map(|c| c.len());
1403
1404        captures.is_some() == wildcard.is_match(input.as_bytes())
1405    }
1406
1407    #[quickcheck]
1408    fn property_captures_filled_in_metasymbols_matches_input(
1409        WildcardAndMatchingInput { pattern, input }: WildcardAndMatchingInput,
1410    ) -> bool {
1411        let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1412
1413        let mut captures = wildcard
1414            .captures(input.as_bytes())
1415            .expect("this test should only get matching test cases")
1416            .into_iter();
1417
1418        let mut pattern_filled_in = String::with_capacity(input.len());
1419
1420        for token in wildcard.parsed() {
1421            match token {
1422                WildcardToken::MetasymbolAny | WildcardToken::MetasymbolOne => {
1423                    let fill = captures.next().expect("must be present");
1424
1425                    pattern_filled_in.push_str(core::str::from_utf8(fill).expect("invalid utf-8"));
1426                }
1427                WildcardToken::Symbol(b) => pattern_filled_in.push(char::from(b)),
1428            }
1429        }
1430
1431        pattern_filled_in == input
1432    }
1433}