simplematch/
lib.rs

1//! # simplematch
2//!
3//! The `simplematch` library provides a fast and efficient way to match wildcard patterns on
4//! strings and bytes. It includes two primary functions, `dowild` and `dowild_with`, along
5//! with an `Options` struct to customize the behavior of the `dowild_with` function.
6//!
7//! ## Usage
8//!
9//! To use the `simplematch` library, include it in your `Cargo.toml`:
10//!
11//! ```toml
12//! [dependencies]
13//! simplematch = "0.3.1"
14//! ```
15//!
16//! ## Functions
17//!
18//! ### `dowild`
19//!
20//! This function is the most performant but has no customization options.
21//!
22//! ```rust, ignore
23//! pub fn dowild<T>(pattern: &[T], haystack: &[T]) -> bool
24//! where
25//!     T: Wildcard
26//! ```
27//!
28//! Matches the given `haystack` against the specified `pattern` using simple wildcard rules.
29//! The `*` character matches any sequence of characters, while the `?` character matches
30//! a single character.
31//!
32//! `Wildcard` is natively implemented for `u8` and `char`.
33//!
34//! **Parameters:**
35//! - `pattern`: A bytes or char slice representing the wildcard pattern to match against.
36//! - `haystack`: A bytes or char slice representing the text to be matched.
37//!
38//! **Returns:**
39//! - `true` if the `pattern` matches the `haystack`, otherwise `false`.
40//!
41//! #### Examples
42//!
43//! ```rust
44//! use simplematch::dowild;
45//!
46//! assert_eq!(dowild("foo*".as_bytes(), "foobar".as_bytes()), true);
47//! assert_eq!(dowild("foo?".as_bytes(), "fooa".as_bytes()), true)
48//! ```
49//!
50//! Or, bringing the trait [`DoWild`] in scope allows for more convenient access to this
51//! function without performance loss:
52//!
53//! ```rust
54//! use simplematch::DoWild;
55//!
56//! assert_eq!("foo*".dowild("foobar"), true);
57//! ```
58//!
59//! A possible usage with `char`:
60//!
61//! ```rust
62//! use simplematch::DoWild;
63//!
64//! let pattern = "foo*".chars().collect::<Vec<char>>();
65//! let haystack = "foobar".chars().collect::<Vec<char>>();
66//!
67//! assert_eq!(pattern.dowild(haystack), true);
68//! ```
69//!
70//! ### `dowild_with`
71//!
72//! ```rust, ignore
73//! use simplematch::Options;
74//!
75//! pub fn dowild_with<T>(pattern: &[T], haystack: &[T], options: Options<T>) -> bool
76//! where
77//!    T: Wildcard + Ord,
78//! ```
79//!
80//! Matches the given `haystack` against the specified `pattern` with customizable [`Options`].
81//! This function allows for matching case insensitive, custom wildcard characters, escaping
82//! special characters and character classes including ranges.
83//!
84//! **Parameters:**
85//! - `pattern`: A bytes or char slice representing the wildcard pattern to match against.
86//! - `haystack`: A bytes or char slice representing the text to be matched.
87//! - `options`: An instance of the [`Options`] struct to customize the matching behavior.
88//!
89//! **Returns:**
90//! - `true` if the `pattern` matches the `haystack` according to the specified options,
91//!   otherwise `false`.
92//!
93//! #### Examples
94//!
95//! ```rust
96//! use simplematch::{dowild_with, Options};
97//!
98//! let options = Options::default()
99//!     .case_insensitive(true)
100//!     .wildcard_any_with(b'%');
101//!
102//! assert_eq!(
103//!     dowild_with("foo%".as_bytes(), "FOOBAR".as_bytes(), options),
104//!     true
105//! );
106//! ```
107//!
108//! Like [`dowild`], the [`dowild_with`] function can be accessed directly on the string or u8
109//! slice, ...:
110//!
111//! ```rust
112//! use simplematch::{DoWild, Options};
113//!
114//! assert_eq!(
115//!     "foo*".dowild_with("FOObar", Options::default().case_insensitive(true)),
116//!     true
117//! );
118//! ```
119//!
120//! ## Character classes
121//!
122//! An expression `[...]` matches a single character if the first character following the
123//! leading `[` is not an `!`. The contents of the brackets must not be empty otherwise the
124//! brackets are interpreted literally (the pattern `a[]c` matches `a[]c` exactly); however, a
125//! `]` can be included as the first character within the brackets. For example, `[][!]`
126//! matches the three characters `[`, `]`, and `!`.
127//!
128//! ## Ranges
129//!
130//! A special convention exists where two characters separated by `-` represent a range.
131//! For instance, `[A-Fa-f0-9]` is equivalent to `[ABCDEFabcdef0123456789]`.
132//! To include `-` as a literal character, it must be placed as the first or last character
133//! within the brackets. For example, `[]-]` matches the two characters `]` and `-`. As opposed
134//! to regex, it is possible to revert a range `[F-A]` which has the same meaning as `[A-F]`.
135//!
136//! ## Complementation
137//!
138//! An expression `[!...]` matches any single character that is not included in the expression
139//! formed by removing the first `!`. For example, `[!]a-]` matches any character except `]`,
140//! `a`, and `-`.
141//!
142//! To remove the special meanings of `?`, `*`, and `[`, you can precede them with the escape
143//! character (per default the backslash character `\`). Within brackets, these characters
144//! represent themselves. For instance, `[[?*\\]` matches the four characters `[`, `?`, `*`,
145//! and `\`.
146//!
147//! ## Credits
148//!
149//! This linear-time wildcard matching algorithm is derived from the one presented in Russ
150//! Cox's great article about simple and performant glob matching (<https://research.swtch.com/glob>).
151//! Furthermore, the optimizations for the `?` handling are based on the article [Matching
152//! Wildcards: An Improved Algorithm for Big
153//! Data](https://developforperformance.com/MatchingWildcards_AnImprovedAlgorithmForBigData.html)
154//! written by Kirk J. Krauss.
155//!
156//! The `simplematch` algorithm is an improved version which uses generally about 2-6x less
157//! instructions than the original algorithm; tested with random small and big data.
158
159// spell-checker: ignore aaabc fooa Krauss
160
161#![cfg_attr(not(feature = "std"), no_std)]
162#![cfg_attr(docsrs, feature(doc_auto_cfg))]
163
164macro_rules! impl_dowild {
165    ( $type:ty: $for:ty ) => {
166        impl DoWild<$type> for $for {
167            fn dowild(&self, haystack: Self) -> bool {
168                dowild(self, haystack)
169            }
170
171            fn dowild_with(&self, haystack: Self, options: Options<$type>) -> bool {
172                dowild_with(self, haystack, options)
173            }
174        }
175    };
176    ( $type:ty: $for:ty => $( $tail:tt )* ) => {
177        impl DoWild<$type> for $for {
178            fn dowild(&self, haystack: Self) -> bool {
179                dowild(self $( $tail )*, haystack $( $tail )* )
180            }
181
182            fn dowild_with(&self, haystack: Self, options: Options<$type>) -> bool {
183                dowild_with(self $( $tail )*, haystack $( $tail )*, options)
184            }
185        }
186    };
187}
188
189#[cfg(not(feature = "std"))]
190extern crate alloc;
191
192#[cfg(not(feature = "std"))]
193use alloc::collections::VecDeque;
194#[cfg(not(feature = "std"))]
195use alloc::string::String;
196#[cfg(not(feature = "std"))]
197use alloc::vec::Vec;
198use core::cmp::Ordering;
199use core::fmt::Display;
200use core::ops::Deref;
201#[cfg(feature = "std")]
202use std::collections::VecDeque;
203#[cfg(feature = "std")]
204use std::error::Error;
205#[cfg(feature = "std")]
206use std::string::String;
207#[cfg(feature = "std")]
208use std::vec::Vec;
209
210/// A convenience trait to use [`dowild`] and [`dowild_with`] directly for this type
211///
212/// This trait is natively implemented for
213///
214/// * `&str`
215/// * `String`
216/// * `&[u8]`
217/// * `Vec<u8>`
218/// * `&[char]`
219/// * `Vec<char>`
220///
221/// # Examples
222///
223/// Use [`dowild`] directly on a `&str`
224///
225/// ```rust
226/// use simplematch::DoWild;
227///
228/// assert_eq!("foo*".dowild("foobar"), true);
229/// ```
230pub trait DoWild<T>
231where
232    T: Wildcard,
233{
234    /// Matches this `pattern` against the specified `haystack` using simple wildcard rules.
235    ///
236    /// See [`dowild`] for more details.
237    ///
238    /// # Examples
239    ///
240    /// ```rust
241    /// use simplematch::DoWild;
242    ///
243    /// assert_eq!("foo*".dowild("foobar"), true);
244    /// ```
245    #[must_use]
246    fn dowild(&self, haystack: Self) -> bool;
247
248    /// Matches this `pattern` against the specified `haystack` with customizable [`Options`].
249    ///
250    /// See [`dowild_with`] for more details.
251    ///
252    /// # Examples
253    ///
254    /// ```rust
255    /// use simplematch::{DoWild, Options};
256    ///
257    /// assert_eq!(
258    ///     "foo*".dowild_with("foobar", Options::default().case_insensitive(true)),
259    ///     true
260    /// );
261    /// ```
262    #[must_use]
263    fn dowild_with(&self, haystack: Self, options: Options<T>) -> bool;
264}
265
266/// The trait for types which should be able to be matched for a wildcard pattern
267pub trait Wildcard: Eq + Copy + Clone {
268    /// The default token to match any number of characters, usually `*`.
269    const DEFAULT_ANY: Self;
270    /// The default token to close a character class pattern, usually `]`.
271    const DEFAULT_CLASS_CLOSE: Self;
272    /// The default token to specify a range, usually `-`.
273    const DEFAULT_CLASS_HYPHEN: Self;
274    /// The default token to negate a character class, usually `!`.
275    const DEFAULT_CLASS_NEGATE: Self;
276    /// The default token to open a character class pattern, usually `[`.
277    const DEFAULT_CLASS_OPEN: Self;
278    /// The default token to escape special characters, usually `\`.
279    const DEFAULT_ESCAPE: Self;
280    /// The default token match exactly one character, usually `?`.
281    const DEFAULT_ONE: Self;
282
283    /// Returns `true` if two character match case-insensitive
284    fn match_one_case_insensitive(first: Self, second: Self) -> bool;
285    /// Returns `true` if two character match case-sensitive
286    fn match_one_case_sensitive(first: Self, second: Self) -> bool;
287
288    /// Returns `true` if the `token` matches the range from `low` to `high` case-insensitive
289    fn match_range_case_insensitive(token: Self, low: Self, high: Self) -> bool;
290    /// Returns `true` if the `token` matches the range from `low` to `high` case-sensitive
291    fn match_range_case_sensitive(token: Self, low: Self, high: Self) -> bool;
292}
293
294/// A simple type to hold the borrowed or owned value `T`
295///
296/// `Cow` would have been an alternative but it requires `std` and we don't need the actual
297/// copy-on-write property just a container for borrowed or owned data.
298#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
299enum BorrowedOrOwned<'a, T> {
300    Borrowed(&'a T),
301    Owned(T),
302}
303
304#[derive(Debug, Clone)]
305enum Class<T> {
306    Positive(Vec<ClassKind<T>>),
307    Negative(Vec<ClassKind<T>>),
308}
309
310#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
311enum ClassKind<T> {
312    /// A range like `a-z`
313    Range(T, T),
314    /// A single character
315    One(T),
316    /// A range which has the same start and end character like `z-z`
317    RangeOne(T),
318}
319
320/// The `Error` of the simplematch crate
321#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
322pub enum SimpleMatchError {
323    /// A character in [`Options`] was assigned multiple times
324    DuplicateCharacterAssignment,
325}
326
327// Represents a character class
328#[derive(Debug, Clone)]
329struct CharacterClass<T> {
330    /// If `None`, the character class is invalid.
331    class: Option<Class<T>>,
332    /// The end index in the pattern
333    end: usize,
334    /// The start index in the pattern
335    start: usize,
336}
337
338#[derive(Debug, Clone)]
339struct CharacterClasses<T>(VecDeque<CharacterClass<T>>);
340
341/// Customize the matching behavior of the [`dowild_with`] function
342#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
343#[non_exhaustive]
344pub struct Options<T>
345where
346    T: Wildcard,
347{
348    /// If `true` the patterns are matched case-sensitive
349    ///
350    /// The default is to match case-sensitive. Currently, only ascii characters are
351    /// considered.
352    pub case_sensitive: bool,
353
354    /// The token to negate a character class.
355    ///
356    /// The default is `!`
357    pub class_negate: T,
358
359    /// Set to `true` to enable character classes `[...]`.
360    ///
361    /// The default is `false`.
362    pub is_classes_enabled: bool,
363
364    /// Set to `true` to enable escaping special characters in the pattern.
365    ///
366    /// The default is `false`.
367    ///
368    /// The default wildcard characters that can be escaped per default are `*`, `?`. These
369    /// characters are adjustable. If character classes are enabled, `[` can be escaped, too.
370    ///
371    /// If the escape character is not escaping a special character it is matched literally.
372    /// For example `"\\a"` matches the escape character and `a` literally.
373    pub is_escape_enabled: bool,
374
375    /// The token in the pattern to match zero or more occurrences of any character.
376    ///
377    /// The default token is `*`.
378    pub wildcard_any: T,
379
380    /// The token in the pattern to escape special characters as defined by the other fields.
381    ///
382    /// The default is the backslash character `\`.
383    pub wildcard_escape: T,
384    /// The token in the pattern to match exactly one occurrence of any character.
385    ///
386    /// The default token is `?`.
387    pub wildcard_one: T,
388}
389
390impl<T> Deref for BorrowedOrOwned<'_, T> {
391    type Target = T;
392
393    #[inline]
394    fn deref(&self) -> &Self::Target {
395        match self {
396            BorrowedOrOwned::Borrowed(value) => value,
397            BorrowedOrOwned::Owned(value) => value,
398        }
399    }
400}
401
402impl<T> AsRef<T> for BorrowedOrOwned<'_, T> {
403    #[inline]
404    fn as_ref(&self) -> &T {
405        self
406    }
407}
408
409impl<T> CharacterClass<T>
410where
411    T: Wildcard + Ord,
412{
413    /// Create a new valid character class
414    #[inline]
415    const fn new(class: Option<Class<T>>, start: usize, end: usize) -> Self {
416        Self { class, end, start }
417    }
418
419    /// Create a new invalid character class
420    #[inline]
421    const fn new_invalid(start: usize, end: usize) -> Self {
422        Self::new(None, start, end)
423    }
424
425    /// Returns the length of this character class.
426    #[inline]
427    const fn len(&self) -> usize {
428        self.end - self.start + 1
429    }
430
431    /// Parse a `CharacterClass`  with the opening bracket at the `start` index
432    ///
433    /// Beware, the starting condition is not verified in any way. A [`CharacterClass`] is
434    /// considered invalid, if there is no closing bracket found.
435    fn parse(start: usize, pattern: &[T], class_negate: T) -> Self {
436        // The first character of a range is always the opening bracket
437        let mut p_idx = start + 1;
438        if p_idx + 2 > pattern.len() {
439            // The pattern is too short to produce a valid range
440            return Self::new_invalid(start, p_idx + 1);
441        }
442
443        let mut class = if pattern[p_idx] == class_negate {
444            p_idx += 1;
445            Class::new_negative()
446        } else {
447            Class::new_positive()
448        };
449
450        // The `]` directly after the opening `[` (and possibly `!`) is special and matched literally
451        if pattern[p_idx] == T::DEFAULT_CLASS_CLOSE {
452            let kind = ClassKind::parse_first(p_idx, pattern);
453            p_idx += kind.len();
454            class.push(kind);
455        }
456
457        if p_idx < pattern.len() {
458            // Parse until we reach either the end of the string or find a `]`
459            while let Some(kind) = ClassKind::parse(p_idx, pattern) {
460                p_idx += kind.len();
461                if p_idx >= pattern.len() {
462                    // The end of the string without a `]`
463                    return Self::new_invalid(start, p_idx);
464                }
465                class.push(kind);
466            }
467
468            // The `None` case tells us we've found a `]` and a valid range
469            Self::new(Some(class), start, p_idx)
470        } else {
471            // We've reached the end of the string without a closing `]`
472            Self::new_invalid(start, p_idx)
473        }
474    }
475
476    /// If this `class` is valid, returns the result of [`Class::is_match`], otherwise `None`
477    #[inline]
478    fn try_match<F, G>(&self, token: T, match_one: F, match_range: G) -> Option<bool>
479    where
480        F: Fn(T, T) -> bool + Copy,
481        G: Fn(T, T, T) -> bool + Copy,
482    {
483        self.class
484            .as_ref()
485            .map(|class| class.is_match(token, match_one, match_range))
486    }
487}
488
489impl<T> CharacterClasses<T>
490where
491    T: Wildcard + Ord,
492{
493    /// Create a new `CharacterClass`
494    ///
495    /// This method does not allocate any memory.
496    #[inline]
497    fn new() -> Self {
498        Self(VecDeque::new())
499    }
500
501    /// Returns the `CharacterClass` with the given `index` as `start` index
502    #[inline]
503    fn get(&self, index: usize) -> Option<&CharacterClass<T>> {
504        self.0.iter().find(|r| r.start == index)
505    }
506
507    #[inline]
508    fn parse(start: usize, pattern: &[T], class_negate: T) -> CharacterClass<T> {
509        CharacterClass::parse(start, pattern, class_negate)
510    }
511
512    /// Parse a new class at this `index` or if already present return a reference to it.
513    ///
514    /// The character at the `index` has to be the opening bracket character. This implies that
515    /// `start < pattern.len()`. Note a [`CharacterClass`] can be invalid if there was no
516    /// closing bracket.
517    fn get_or_add(&mut self, start: usize, pattern: &[T], class_negate: T) -> &CharacterClass<T> {
518        if let Some(last) = self.0.back() {
519            #[allow(clippy::else_if_without_else)]
520            if last.start == start {
521                // SAFETY: The equivalent safe code is `return self.0.back().unwrap()`, but calling
522                // `back()` again and unwrap is unnecessary in this case. The reference `last` is
523                // guaranteed to be valid as it is just obtained from `self.0.back()`. The mutable
524                // reference to `self` prevents any concurrent modifications to `self.0` while this
525                // function is executing, ensuring that the data remains valid between the call to
526                // `back()` and the return here.
527                return unsafe { &*(last as *const CharacterClass<T>) };
528            // We already parsed this character class
529            } else if last.start > start {
530                return self.get(start).unwrap();
531            }
532        }
533
534        let class = Self::parse(start, pattern, class_negate);
535
536        // Stick to the default allocation strategy, doubling the buffer starting with a capacity of
537        // `1`. In case of an invalid class as first class, the maximum amount of classes is `1`, so
538        // `1` might be a good starting point in any case. The maximum amount of `(pattern.len() -
539        // start) / 3` valid classes is most likely too much in typical scenarios.
540        self.0.push_back(class);
541
542        // SAFETY: This unwrap is safe since we just added a class
543        unsafe { self.0.back().unwrap_unchecked() }
544    }
545
546    /// Remove classes that have a smaller starting index than the given `index`
547    #[inline]
548    fn prune(&mut self, index: usize) {
549        while let Some(first) = self.0.front() {
550            if first.start < index {
551                self.0.pop_front();
552            } else {
553                break;
554            }
555        }
556    }
557}
558
559impl<T> Class<T>
560where
561    T: Wildcard + Ord,
562{
563    /// Create a new positive `Class`.
564    #[inline]
565    const fn new_positive() -> Self {
566        Self::Positive(Vec::new())
567    }
568
569    /// Create a new negative `Class`.
570    #[inline]
571    const fn new_negative() -> Self {
572        Self::Negative(Vec::new())
573    }
574
575    /// Add a new [`ClassKind`] to this `Class`.
576    #[inline]
577    fn push(&mut self, kind: ClassKind<T>) {
578        match self {
579            Self::Positive(kinds) | Self::Negative(kinds) => {
580                if kinds.last() != Some(&kind) {
581                    kinds.push(kind);
582                }
583            }
584        }
585    }
586
587    /// Returns `true` if a positive `Class` contains the given `token` or if negative doesn't
588    /// contain the `token`.
589    #[inline]
590    fn is_match<F, G>(&self, token: T, match_one: F, match_range: G) -> bool
591    where
592        F: Fn(T, T) -> bool + Copy,
593        G: Fn(T, T, T) -> bool + Copy,
594    {
595        match self {
596            Self::Positive(kinds) => kinds
597                .iter()
598                .any(|r| r.contains(&token, match_one, match_range)),
599            Self::Negative(kinds) => !kinds
600                .iter()
601                .any(|r| r.contains(&token, match_one, match_range)),
602        }
603    }
604}
605
606impl<T> ClassKind<T>
607where
608    T: Wildcard + Ord,
609{
610    #[inline]
611    fn contains<F, G>(&self, token: &T, match_one: F, match_range: G) -> bool
612    where
613        F: Fn(T, T) -> bool,
614        G: Fn(T, T, T) -> bool,
615    {
616        match self {
617            Self::Range(low, high) => match_range(*token, *low, *high),
618            Self::One(c) | Self::RangeOne(c) => match_one(*c, *token),
619        }
620    }
621
622    /// Does no out of bounds check for the first character
623    #[inline]
624    fn parse(index: usize, pattern: &[T]) -> Option<Self> {
625        if pattern[index] == T::DEFAULT_CLASS_CLOSE {
626            None
627        } else {
628            Some(Self::parse_first(index, pattern))
629        }
630    }
631
632    /// Does no out of bounds and `]` check for the first character
633    fn parse_first(index: usize, pattern: &[T]) -> Self {
634        let first = pattern[index];
635        if index + 2 < pattern.len() && pattern[index + 1] == T::DEFAULT_CLASS_HYPHEN {
636            let second = pattern[index + 2];
637            if second == T::DEFAULT_CLASS_CLOSE {
638                Self::One(first)
639            } else {
640                match first.cmp(&second) {
641                    Ordering::Less => Self::Range(first, second),
642                    Ordering::Equal => Self::RangeOne(first),
643                    Ordering::Greater => Self::Range(second, first),
644                }
645            }
646        } else {
647            Self::One(first)
648        }
649    }
650
651    #[inline]
652    const fn len(&self) -> usize {
653        match self {
654            Self::Range(_, _) | Self::RangeOne(_) => 3,
655            Self::One(_) => 1,
656        }
657    }
658}
659
660impl<T> Default for Options<T>
661where
662    T: Wildcard,
663{
664    fn default() -> Self {
665        Self::new()
666    }
667}
668
669impl<T> Options<T>
670where
671    T: Wildcard,
672{
673    /// Create new `Options` for the [`dowild_with`] function.
674    #[must_use]
675    pub const fn new() -> Self {
676        Self {
677            case_sensitive: true,
678            wildcard_escape: T::DEFAULT_ESCAPE,
679            is_classes_enabled: false,
680            class_negate: T::DEFAULT_CLASS_NEGATE,
681            wildcard_any: T::DEFAULT_ANY,
682            wildcard_one: T::DEFAULT_ONE,
683            is_escape_enabled: false,
684        }
685    }
686
687    /// If `true` match the pattern case-insensitive.
688    ///
689    /// The default is to match case-sensitive.
690    ///
691    /// # Examples
692    ///
693    /// ```rust
694    /// use simplematch::Options;
695    ///
696    /// let options: Options<u8> = Options::default().case_insensitive(true);
697    /// ```
698    #[must_use]
699    pub const fn case_insensitive(mut self, yes: bool) -> Self {
700        self.case_sensitive = !yes;
701        self
702    }
703
704    /// If `true` enable escaping of special characters in the pattern.
705    ///
706    /// The default is `false` and the default escape character is backslash `\`.
707    ///
708    /// # Examples
709    ///
710    /// ```rust
711    /// use simplematch::Options;
712    ///
713    /// let options: Options<u8> = Options::default().enable_escape(true);
714    /// ```
715    #[must_use]
716    pub const fn enable_escape(mut self, yes: bool) -> Self {
717        self.is_escape_enabled = yes;
718        self
719    }
720
721    /// Enable escaping of special characters but use this `token` instead of the default.
722    ///
723    /// The default is `false` and the default escape character is backslash `\`.
724    ///
725    /// # Examples
726    ///
727    /// ```rust
728    /// use simplematch::Options;
729    ///
730    /// let options = Options::default().enable_escape_with(b'#');
731    /// ```
732    #[must_use]
733    pub const fn enable_escape_with(mut self, token: T) -> Self {
734        self.is_escape_enabled = true;
735        self.wildcard_escape = token;
736        self
737    }
738
739    /// If `true`, enable character classes `[...]`.
740    ///
741    /// The default is `false`.
742    ///
743    /// # Examples
744    ///
745    /// ```rust
746    /// use simplematch::Options;
747    ///
748    /// let options: Options<u8> = Options::default().enable_classes(true);
749    /// ```
750    #[must_use]
751    pub const fn enable_classes(mut self, yes: bool) -> Self {
752        self.is_classes_enabled = yes;
753        self
754    }
755
756    /// If `true`, enable character classes `[...]` but use this `token` for the negation.
757    ///
758    /// The default is `false` and the default negation character is exclamation mark `!`.
759    ///
760    /// # Examples
761    ///
762    /// Set the negation character to the same character as regex uses it.
763    ///
764    /// ```rust
765    /// use simplematch::Options;
766    ///
767    /// let options = Options::default().enable_classes_with(b'^');
768    /// ```
769    #[must_use]
770    pub const fn enable_classes_with(mut self, negation: T) -> Self {
771        self.is_classes_enabled = true;
772        self.class_negate = negation;
773        self
774    }
775
776    /// Use this `token` instead of the default `*` to match any occurrences of a characters.
777    ///
778    /// # Examples
779    ///
780    /// ```rust
781    /// use simplematch::Options;
782    ///
783    /// let options = Options::default().wildcard_any_with(b'%');
784    /// ```
785    #[must_use]
786    pub const fn wildcard_any_with(mut self, token: T) -> Self {
787        self.wildcard_any = token;
788        self
789    }
790
791    /// Use this `token` instead of the default `?` to match exactly one character.
792    ///
793    /// # Examples
794    ///
795    /// ```rust
796    /// use simplematch::Options;
797    ///
798    /// let options = Options::default().wildcard_any_with(b'_');
799    #[must_use]
800    pub const fn wildcard_one_with(mut self, token: T) -> Self {
801        self.wildcard_one = token;
802        self
803    }
804
805    /// Check `Options` for configuration errors
806    ///
807    /// An invalid configuration consists of duplicate character assignments. For example you
808    /// can't use `*` for the escape character and `wildcard_any` character simultaneously.
809    ///
810    /// # Errors
811    ///
812    /// Returns an error if these `Options` are invalid.
813    ///
814    /// # Examples
815    ///
816    /// Assigning `?` with [`wildcard_any_with`](Options::wildcard_any_with) fails this method.
817    ///
818    /// ```rust
819    /// use simplematch::{Options, SimpleMatchError};
820    ///
821    /// assert_eq!(
822    ///     Options::default().wildcard_any_with(b'?').verified(),
823    ///     Err(SimpleMatchError::DuplicateCharacterAssignment)
824    /// );
825    /// ```
826    pub fn verify(&self) -> Result<(), SimpleMatchError> {
827        if self.wildcard_any == self.wildcard_one
828            || self.wildcard_any == self.wildcard_escape
829            || self.wildcard_any == self.class_negate
830            || self.wildcard_one == self.wildcard_escape
831            || self.wildcard_one == self.class_negate
832            || self.wildcard_escape == self.class_negate
833        {
834            return Err(SimpleMatchError::DuplicateCharacterAssignment);
835        }
836
837        Ok(())
838    }
839
840    /// A convenience method that consumes and returns these `Options` if it succeeds.
841    ///
842    /// The only difference to [`verify`] is, that this method consumes the [`Options`]
843    /// returning it on success.
844    ///
845    /// # Errors
846    ///
847    /// Returns an error if `Options` are invalid.
848    ///
849    /// # Examples
850    ///
851    /// If the configuration is valid, this method returns these `Options`.
852    ///
853    /// ```rust
854    /// use simplematch::{Options, SimpleMatchError};
855    ///
856    /// let options = Options::default()
857    ///     .wildcard_any_with(b'%')
858    ///     .verified()
859    ///     .unwrap();
860    /// ```
861    ///
862    /// Otherwise, for example assigning `?` with
863    /// [`wildcard_any_with`](Options::wildcard_any_with) fails.
864    ///
865    /// ```rust
866    /// use simplematch::{Options, SimpleMatchError};
867    ///
868    /// assert_eq!(
869    ///     Options::default().wildcard_any_with(b'?').verified(),
870    ///     Err(SimpleMatchError::DuplicateCharacterAssignment)
871    /// );
872    /// ```
873    ///
874    /// [`verify`]: Options::verify
875    pub fn verified(self) -> Result<Self, SimpleMatchError> {
876        self.verify().map(|()| self)
877    }
878}
879
880#[cfg(feature = "std")]
881impl Error for SimpleMatchError {}
882
883impl Display for SimpleMatchError {
884    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
885        match self {
886            Self::DuplicateCharacterAssignment => {
887                write!(
888                    f,
889                    "Verifying options failed: The options contain a duplicate character \
890                     assignment."
891                )
892            }
893        }
894    }
895}
896
897////////////////////////////////////////////////////////////////////////////////
898// Our trait implementations for the basic types
899////////////////////////////////////////////////////////////////////////////////
900
901impl_dowild!(u8: &[u8]);
902impl_dowild!(u8: &str => .as_bytes());
903impl_dowild!(u8: String => .as_bytes());
904impl_dowild!(u8: Vec<u8> => .as_slice());
905impl_dowild!(char: &[char]);
906impl_dowild!(char: Vec<char> => .as_slice());
907
908impl Wildcard for u8 {
909    const DEFAULT_ANY: Self = b'*';
910    const DEFAULT_ESCAPE: Self = b'\\';
911    const DEFAULT_ONE: Self = b'?';
912    const DEFAULT_CLASS_CLOSE: Self = b']';
913    const DEFAULT_CLASS_HYPHEN: Self = b'-';
914    const DEFAULT_CLASS_NEGATE: Self = b'!';
915    const DEFAULT_CLASS_OPEN: Self = b'[';
916
917    #[inline]
918    fn match_one_case_sensitive(first: Self, second: Self) -> bool {
919        first == second
920    }
921
922    #[inline]
923    fn match_one_case_insensitive(first: Self, second: Self) -> bool {
924        first.eq_ignore_ascii_case(&second)
925    }
926
927    #[inline]
928    fn match_range_case_sensitive(token: Self, low: Self, high: Self) -> bool {
929        low <= token && token <= high
930    }
931
932    #[inline]
933    fn match_range_case_insensitive(token: Self, low: Self, high: Self) -> bool {
934        if low <= token && token <= high {
935            true
936        } else if !token.is_ascii_alphabetic() {
937            false
938        } else {
939            is_in_ascii_range_case_insensitive(token, low, high)
940        }
941    }
942}
943
944impl Wildcard for char {
945    const DEFAULT_ANY: Self = '*';
946    const DEFAULT_ESCAPE: Self = '\\';
947    const DEFAULT_ONE: Self = '?';
948    const DEFAULT_CLASS_CLOSE: Self = ']';
949    const DEFAULT_CLASS_HYPHEN: Self = '-';
950    const DEFAULT_CLASS_NEGATE: Self = '!';
951    const DEFAULT_CLASS_OPEN: Self = '[';
952
953    #[inline]
954    fn match_one_case_insensitive(first: Self, second: Self) -> bool {
955        first.eq_ignore_ascii_case(&second)
956    }
957
958    #[inline]
959    fn match_one_case_sensitive(first: Self, second: Self) -> bool {
960        first == second
961    }
962
963    #[inline]
964    fn match_range_case_sensitive(token: Self, low: Self, high: Self) -> bool {
965        low <= token && token <= high
966    }
967
968    #[inline]
969    fn match_range_case_insensitive(token: Self, low: Self, high: Self) -> bool {
970        if low <= token && token <= high {
971            true
972        } else if !(low.is_ascii() && high.is_ascii() && token.is_ascii_alphabetic()) {
973            false
974        } else {
975            is_in_ascii_range_case_insensitive(token as u8, low as u8, high as u8)
976        }
977    }
978}
979
980////////////////////////////////////////////////////////////////////////////////
981// The main dowild functions
982////////////////////////////////////////////////////////////////////////////////
983
984/// Returns `true` if the wildcard pattern matches the `haystack`.
985///
986/// Allowed wildcard characters are `*` to match any amount of characters and `?` to match
987/// exactly one character.
988///
989/// This is the basic algorithm without customization options to provide the best performance.
990/// If you need [`Options`] you can use [`dowild_with`].
991///
992/// Instead of using this function, match directly on strings, u8 slices, ... without
993/// performance loss, if you bring the [`DoWild`] trait in scope.
994///
995/// See also the [library documentation](crate) for more details.
996///
997/// # Examples
998///
999/// ```rust
1000/// use simplematch::dowild;
1001///
1002/// assert_eq!(dowild("*bc".as_bytes(), "aaabc".as_bytes()), true);
1003/// ```
1004///
1005/// or more conveniently directly on a string
1006///
1007/// ```rust
1008/// use simplematch::DoWild;
1009///
1010/// assert_eq!("*bc".dowild("aaabc"), true);
1011/// ```
1012#[must_use]
1013pub fn dowild<T>(pattern: &[T], haystack: &[T]) -> bool
1014where
1015    T: Wildcard,
1016{
1017    let mut p_idx = 0;
1018    let mut h_idx = 0;
1019
1020    let mut next_p_idx = 0;
1021    let mut next_h_idx = 0;
1022
1023    let wildcard_any = T::DEFAULT_ANY;
1024    let wildcard_one = T::DEFAULT_ONE;
1025
1026    let mut has_seen_wildcard_any = false;
1027    while p_idx < pattern.len() || h_idx < haystack.len() {
1028        if p_idx < pattern.len() {
1029            match pattern[p_idx] {
1030                // This (expensive) case is ensured to be entered only once per `wildcard_any` (or
1031                // multiple consecutive `wildcard_any`) character in the pattern. This allows us to
1032                // perform optimizations which would be otherwise not worth it. Note that every
1033                // increment of the indices in this match case also increments the respective
1034                // `next_*` index in the end.
1035                c if c == wildcard_any => {
1036                    has_seen_wildcard_any = true;
1037                    p_idx += 1;
1038
1039                    while p_idx < pattern.len() && pattern[p_idx] == wildcard_any {
1040                        p_idx += 1;
1041                    }
1042                    if p_idx >= pattern.len() {
1043                        return true;
1044                    }
1045
1046                    let next_c = pattern[p_idx];
1047                    if next_c == wildcard_one {
1048                        // 1. This optimization prevents checking for the same `wildcard_one`
1049                        //    character in the big loop again.
1050                        // 2. More importantly for the performance, we can advance the pattern and
1051                        //    haystack for all index counters including `next_h_idx` and
1052                        //    `next_p_idx`.
1053                        while h_idx < haystack.len() {
1054                            p_idx += 1;
1055                            h_idx += 1;
1056                            if !(p_idx < pattern.len() && pattern[p_idx] == next_c) {
1057                                break;
1058                            }
1059                        }
1060                        // The end of the haystack might not yet be reached but for example `*????`
1061                        // matches anything.
1062                        if p_idx >= pattern.len() {
1063                            return true;
1064                        }
1065                    } else {
1066                        // Advancing the haystack and indirectly the `next_h_idx` counter to the
1067                        // first match significantly enhances the overall performance.
1068                        while h_idx < haystack.len() && haystack[h_idx] != next_c {
1069                            h_idx += 1;
1070                        }
1071                        if h_idx >= haystack.len() {
1072                            return false;
1073                        }
1074                    }
1075
1076                    // Instead of pinning `next_p_idx` to the `wildcard_any` index and entering this
1077                    // match case in the big loop again after a reset to the `next` indices, it's
1078                    // more efficient to pin it to the first character after `wildcard_any` (or
1079                    // after `wildcard_one` if it is the character after `wildcard_any`). However, we
1080                    // need to ensure in this match case that `next_p_idx` is not out of bounds.
1081                    next_p_idx = p_idx;
1082                    next_h_idx = h_idx;
1083                    continue;
1084                }
1085                c if c == wildcard_one => {
1086                    if h_idx < haystack.len() {
1087                        p_idx += 1;
1088                        h_idx += 1;
1089                        continue;
1090                    }
1091                }
1092                c => {
1093                    if h_idx < haystack.len() && haystack[h_idx] == c {
1094                        p_idx += 1;
1095                        h_idx += 1;
1096                        continue;
1097                    }
1098                }
1099            }
1100        }
1101        // If `true`, we need to reset. Therefore, this statement can be entered multiple times per
1102        // `wildcard_any`, so we need to be more careful with optimizations here than in the
1103        // `wildcard_any` match case above.
1104        if has_seen_wildcard_any && next_h_idx < haystack.len() {
1105            p_idx = next_p_idx;
1106            next_h_idx += 1;
1107
1108            // We don't enter the `wildcard_any` match case in the big loop again, so we have to
1109            // apply this optimization from above here again, if applicable. This check let's the
1110            // compiler optimize the loop better than without the check although p_idx can't be
1111            // out of bounds here.
1112            if p_idx < pattern.len() {
1113                while next_h_idx < haystack.len() && haystack[next_h_idx] != pattern[p_idx] {
1114                    next_h_idx += 1;
1115                }
1116            }
1117
1118            h_idx = next_h_idx;
1119            continue;
1120        }
1121
1122        return false;
1123    }
1124
1125    // The pattern and the haystack are both exhausted which means we have a match
1126    true
1127}
1128
1129/// Returns `true` if the wildcard pattern matches the `haystack`. This method can be
1130/// customized with [`Options`].
1131///
1132/// Don't use this method if you only need the default [`Options`]. The [`dowild`] function is
1133/// more performant in such cases.
1134///
1135/// Like with [`dowild`], allowed wildcard characters are `*` to match any amount of characters
1136/// and `?` to match exactly one character.
1137///
1138/// The [`Options`] structure allows for case-insensitive matching. You can customize the
1139/// `wildcard_any` character (`*`) and the `wildcard_one` character (`?`). Escaping can also
1140/// be enabled, allowing you to specify a custom escape character. Additionally, character
1141/// classes and ranges, such as `[a-z]`, are supported, and the negation character can be
1142/// customized to match all characters not included in a specified range, as in `[!a-z]`.
1143///
1144/// See also the [library documentation](crate) for more details.
1145///
1146/// # Examples
1147///
1148/// ```rust
1149/// use simplematch::{dowild_with, Options};
1150///
1151/// assert_eq!(
1152///     dowild_with(
1153///         "*bc".as_bytes(),
1154///         "AAabc".as_bytes(),
1155///         Options::default().case_insensitive(true)
1156///     ),
1157///     true
1158/// );
1159/// ```
1160///
1161/// or more conveniently match directly on a string bringing the [`DoWild`] trait in
1162/// scope.
1163///
1164/// ```rust
1165/// use simplematch::{DoWild, Options};
1166///
1167/// assert_eq!(
1168///     "%bc".dowild_with("aaabc", Options::default().wildcard_any_with(b'%')),
1169///     true
1170/// );
1171/// ```
1172#[must_use]
1173pub fn dowild_with<T>(pattern: &[T], haystack: &[T], options: Options<T>) -> bool
1174where
1175    T: Wildcard + Ord,
1176{
1177    if options.case_sensitive {
1178        dowild_with_worker(
1179            pattern,
1180            haystack,
1181            options,
1182            T::match_one_case_sensitive,
1183            T::match_range_case_sensitive,
1184        )
1185    } else {
1186        dowild_with_worker(
1187            pattern,
1188            haystack,
1189            options,
1190            T::match_one_case_insensitive,
1191            T::match_range_case_insensitive,
1192        )
1193    }
1194}
1195
1196/// This method has the same structure like [`dowild`] but can apply [`Options`]
1197///
1198/// Customizability has a price performance-wise, so this method is by nature slower than
1199/// [`dowild`].
1200#[inline]
1201#[allow(clippy::too_many_lines)]
1202fn dowild_with_worker<F, G, T>(
1203    pattern: &[T],
1204    haystack: &[T],
1205    options: Options<T>,
1206    match_one: F,
1207    match_range: G,
1208) -> bool
1209where
1210    T: Wildcard + Ord,
1211    F: Fn(T, T) -> bool + Copy,
1212    G: Fn(T, T, T) -> bool + Copy,
1213{
1214    let Options {
1215        class_negate,
1216        is_classes_enabled,
1217        is_escape_enabled,
1218        wildcard_any,
1219        wildcard_escape,
1220        wildcard_one,
1221        ..
1222    } = options;
1223
1224    let is_wildcard_any = |token: T| token == wildcard_any;
1225    let is_wildcard_one = |token: T| token == wildcard_one;
1226    let is_escape = |token: T| is_escape_enabled && token == wildcard_escape;
1227    let is_class_open = |token: T| is_classes_enabled && token == T::DEFAULT_CLASS_OPEN;
1228
1229    let is_special = |token: T| {
1230        token == wildcard_any
1231            || token == wildcard_one
1232            || token == wildcard_escape
1233            || (is_classes_enabled && token == T::DEFAULT_CLASS_OPEN)
1234    };
1235
1236    let is_valid_class_or_escape = |token: T, p_idx: usize, invalid_class_idx: usize| {
1237        (is_classes_enabled && token == T::DEFAULT_CLASS_OPEN && p_idx < invalid_class_idx)
1238            || (is_escape_enabled && token == wildcard_escape)
1239    };
1240
1241    let mut p_idx = 0;
1242    let mut h_idx = 0;
1243
1244    let mut next_p_idx = 0;
1245    let mut next_h_idx = 0;
1246
1247    // There are no allocations, yet. `CharacterClasses` allocate on first use.
1248    let mut classes = CharacterClasses::new();
1249
1250    let mut has_seen_wildcard_any = false;
1251    let mut invalid_class_idx = usize::MAX;
1252
1253    while p_idx < pattern.len() || h_idx < haystack.len() {
1254        if p_idx < pattern.len() {
1255            match pattern[p_idx] {
1256                c if is_wildcard_any(c) => {
1257                    has_seen_wildcard_any = true;
1258                    p_idx += 1;
1259
1260                    while p_idx < pattern.len() && is_wildcard_any(pattern[p_idx]) {
1261                        p_idx += 1;
1262                    }
1263                    if p_idx >= pattern.len() {
1264                        return true;
1265                    }
1266
1267                    let next_c = pattern[p_idx];
1268                    #[allow(clippy::else_if_without_else)]
1269                    if is_wildcard_one(next_c) {
1270                        while h_idx < haystack.len() {
1271                            p_idx += 1;
1272                            h_idx += 1;
1273                            if !(p_idx < pattern.len() && is_wildcard_one(pattern[p_idx])) {
1274                                break;
1275                            }
1276                        }
1277                        if p_idx >= pattern.len() {
1278                            return true;
1279                        }
1280                    } else if !is_valid_class_or_escape(next_c, p_idx, invalid_class_idx) {
1281                        while h_idx < haystack.len() && !match_one(haystack[h_idx], next_c) {
1282                            h_idx += 1;
1283                        }
1284                        if h_idx >= haystack.len() {
1285                            return false;
1286                        }
1287                    }
1288
1289                    next_p_idx = p_idx;
1290                    next_h_idx = h_idx;
1291                    continue;
1292                }
1293                c if is_wildcard_one(c) => {
1294                    if h_idx < haystack.len() {
1295                        p_idx += 1;
1296                        h_idx += 1;
1297                        continue;
1298                    }
1299                }
1300                // Handling of the escape character. If it is the last character in the pattern, it
1301                // can only stand for itself.
1302                c if is_escape(c) && p_idx + 1 < pattern.len() => {
1303                    if h_idx < haystack.len() {
1304                        let next_c = pattern[p_idx + 1];
1305                        let h = haystack[h_idx];
1306
1307                        #[allow(clippy::else_if_without_else)]
1308                        if is_special(next_c) && h == next_c {
1309                            p_idx += 2;
1310                            h_idx += 1;
1311                            continue;
1312                        } else if !is_special(next_c) && h == wildcard_escape {
1313                            p_idx += 1;
1314                            h_idx += 1;
1315                            continue;
1316                        }
1317                    }
1318                }
1319                // Handle character classes. To avoid parsing the same classes multiple times on
1320                // reset, every class including the invalid ones are stored in a container. However,
1321                // classes that are outside of the possible index don't need to be considered anymore
1322                // and are pruned.
1323                c if is_class_open(c) && p_idx < invalid_class_idx && p_idx + 1 < pattern.len() => {
1324                    if h_idx < haystack.len() {
1325                        let class = if has_seen_wildcard_any {
1326                            // Try to get rid of classes outside of the possible index
1327                            classes.prune(next_p_idx);
1328                            BorrowedOrOwned::Borrowed(classes.get_or_add(
1329                                p_idx,
1330                                pattern,
1331                                class_negate,
1332                            ))
1333                        } else {
1334                            // There's no need to store character classes as long as we don't require
1335                            // to reset.
1336                            BorrowedOrOwned::Owned(CharacterClasses::parse(
1337                                p_idx,
1338                                pattern,
1339                                class_negate,
1340                            ))
1341                        };
1342
1343                        // Try to match this class. If it is an invalid class, we can interpret the
1344                        // opening bracket character literally and the rest of the pattern as if
1345                        // there is no class. If the class is valid and matched, we can advance as
1346                        // usual, otherwise we need to reset.
1347                        #[allow(clippy::else_if_without_else)]
1348                        if let Some(is_match) =
1349                            class.try_match(haystack[h_idx], match_one, match_range)
1350                        {
1351                            p_idx += class.len();
1352                            if is_match {
1353                                h_idx += 1;
1354                                continue;
1355                            }
1356                        } else {
1357                            invalid_class_idx = class.as_ref().start;
1358                            // A small shortcut to avoid the big loop and enter the generic
1359                            // character case.
1360                            if match_one(haystack[h_idx], T::DEFAULT_CLASS_OPEN) {
1361                                p_idx += 1;
1362                                h_idx += 1;
1363                                continue;
1364                            }
1365                        }
1366                    }
1367                }
1368                c => {
1369                    if h_idx < haystack.len() && match_one(haystack[h_idx], c) {
1370                        p_idx += 1;
1371                        h_idx += 1;
1372                        continue;
1373                    }
1374                }
1375            }
1376        }
1377        if has_seen_wildcard_any && next_h_idx < haystack.len() {
1378            p_idx = next_p_idx;
1379            next_h_idx += 1;
1380
1381            if p_idx < pattern.len()
1382                && !is_valid_class_or_escape(pattern[p_idx], p_idx, invalid_class_idx)
1383            {
1384                while next_h_idx < haystack.len() && !match_one(haystack[next_h_idx], pattern[p_idx])
1385                {
1386                    next_h_idx += 1;
1387                }
1388            }
1389
1390            h_idx = next_h_idx;
1391            continue;
1392        }
1393
1394        return false;
1395    }
1396    true
1397}
1398
1399/// Returns true if the `token` is in the case insensitive inclusive range from `low` to `high`
1400///
1401/// `token` has to be ascii alphabetic character.
1402///
1403/// This function can be counter-intuitive, for example for `A-j` and the token `z`, this
1404/// function returns `true`. However, this is how regex engines (tested with python, go, the
1405/// regex crate, ...) usually evaluate it.
1406#[inline]
1407const fn is_in_ascii_range_case_insensitive(token: u8, low: u8, high: u8) -> bool {
1408    const ASCII_CASE_MASK: u8 = 0b0010_0000;
1409
1410    if token.is_ascii_lowercase() {
1411        let token_uppercase = token ^ ASCII_CASE_MASK;
1412        low <= token_uppercase && token_uppercase <= high
1413    // Since token is alphabetic it is an uppercase character
1414    } else {
1415        let token_lowercase = token | ASCII_CASE_MASK;
1416        low <= token_lowercase && token_lowercase <= high
1417    }
1418}
1419
1420#[cfg(test)]
1421mod tests {
1422    use rstest::rstest;
1423
1424    use super::*;
1425
1426    #[rstest]
1427    #[case::same_case_sensitive(b'j', b'j', true, true)]
1428    #[case::different_case_case_sensitive(b'j', b'J', true, false)]
1429    #[case::different_char_case_sensitive(b'a', b'b', true, false)]
1430    #[case::same_case_insensitive(b'j', b'j', false, true)]
1431    #[case::different_case_insensitive(b'j', b'J', false, true)]
1432    #[case::different_char_case_insensitive(b'a', b'B', false, false)]
1433    fn impl_wildcard_match_one(
1434        #[case] first: u8,
1435        #[case] second: u8,
1436        #[case] case_sensitive: bool,
1437        #[case] expected: bool,
1438    ) {
1439        if case_sensitive {
1440            assert_eq!(Wildcard::match_one_case_sensitive(first, second), expected);
1441            assert_eq!(
1442                Wildcard::match_one_case_sensitive(first as char, second as char),
1443                expected
1444            );
1445        } else {
1446            assert_eq!(
1447                Wildcard::match_one_case_insensitive(first, second),
1448                expected
1449            );
1450            assert_eq!(
1451                Wildcard::match_one_case_insensitive(first as char, second as char),
1452                expected
1453            );
1454        }
1455    }
1456
1457    #[rstest]
1458    #[case::all_the_same(b'j', b'j', b'j', true)]
1459    #[case::low_is_higher_high_is_same(b'j', b'k', b'j', false)]
1460    #[case::low_is_lower_high_is_same(b'j', b'i', b'j', true)]
1461    #[case::high_is_lower_low_is_same(b'j', b'k', b'i', false)]
1462    #[case::high_is_higher_low_is_same(b'j', b'j', b'k', true)]
1463    #[case::non_alpha_when_false(b'#', b'*', b']', false)]
1464    #[case::non_alpha_when_true(b'+', b'*', b']', true)]
1465    #[case::only_token_alpha(b'a', b'*', b']', false)]
1466    #[case::only_token_big_alpha(b'A', b'*', b']', true)]
1467    #[case::between_alphabetic(b']', b'*', b'B', false)]
1468    fn impl_wildcard_match_range_when_case_sensitive(
1469        #[case] token: u8,
1470        #[case] low: u8,
1471        #[case] high: u8,
1472        #[case] expected: bool,
1473    ) {
1474        assert_eq!(
1475            Wildcard::match_range_case_sensitive(token, low, high),
1476            expected
1477        );
1478        assert_eq!(
1479            Wildcard::match_range_case_sensitive(token as char, low as char, high as char),
1480            expected
1481        );
1482    }
1483
1484    #[rstest]
1485    #[case::all_the_same_small(b'j', b'j', b'j', true)]
1486    #[case::all_the_same_big(b'J', b'J', b'J', true)]
1487    // This token is one of the characters between `Z` and `a`
1488    #[case::no_alpha_low_is_big(b'[', b'A', b'z', true)]
1489    #[case::no_alpha_both_big(b'[', b'A', b'Z', false)]
1490    #[case::no_alpha_low_is_small(b'[', b'a', b'z', false)]
1491    #[case::no_alpha_both_small(b'[', b'a', b'z', false)]
1492    #[case::all_small_middle(b'j', b'a', b'z', true)]
1493    #[case::all_small_low_is_higher(b'j', b'k', b'z', false)]
1494    #[case::all_small_high_is_lower(b'j', b'a', b'i', false)]
1495    #[case::all_big_middle(b'J', b'A', b'Z', true)]
1496    #[case::all_big_low_is_higher(b'J', b'K', b'Z', false)]
1497    #[case::all_big_high_is_lower(b'J', b'A', b'I', false)]
1498    #[case::big_a_to_j(b'z', b'A', b'j', true)]
1499    #[case::non_alpha_when_false(b'#', b'*', b']', false)]
1500    #[case::control_when_false(b'\x1f', b'*', b']', false)]
1501    #[case::non_alpha_when_true(b'+', b'*', b']', true)]
1502    #[case::only_token_alpha(b'a', b'*', b']', true)]
1503    #[case::only_token_big_alpha(b'A', b'*', b']', true)]
1504    #[case::between_alphabetic(b']', b'*', b'B', false)]
1505    fn impl_wildcard_match_range_when_case_insensitive(
1506        #[case] token: u8,
1507        #[case] low: u8,
1508        #[case] high: u8,
1509        #[case] expected: bool,
1510    ) {
1511        assert_eq!(
1512            Wildcard::match_range_case_insensitive(token, low, high),
1513            expected
1514        );
1515        assert_eq!(
1516            Wildcard::match_range_case_insensitive(token as char, low as char, high as char),
1517            expected
1518        );
1519    }
1520}
simplematch/lib.rs

simplematch/
lib.rs