simplematch/lib.rs
1//! # simplematch
2//!
3//! The `simplematch` library provides a fast and efficient way to match wildcard patterns on
4//! strings and bytes. It includes two primary functions, `dowild` and `dowild_with`, along
5//! with an `Options` struct to customize the behavior of the `dowild_with` function.
6//!
7//! ## Usage
8//!
9//! To use the `simplematch` library, include it in your `Cargo.toml`:
10//!
11//! ```toml
12//! [dependencies]
13//! simplematch = "0.3.1"
14//! ```
15//!
16//! ## Functions
17//!
18//! ### `dowild`
19//!
20//! This function is the most performant but has no customization options.
21//!
22//! ```rust, ignore
23//! pub fn dowild<T>(pattern: &[T], haystack: &[T]) -> bool
24//! where
25//! T: Wildcard
26//! ```
27//!
28//! Matches the given `haystack` against the specified `pattern` using simple wildcard rules.
29//! The `*` character matches any sequence of characters, while the `?` character matches
30//! a single character.
31//!
32//! `Wildcard` is natively implemented for `u8` and `char`.
33//!
34//! **Parameters:**
35//! - `pattern`: A bytes or char slice representing the wildcard pattern to match against.
36//! - `haystack`: A bytes or char slice representing the text to be matched.
37//!
38//! **Returns:**
39//! - `true` if the `pattern` matches the `haystack`, otherwise `false`.
40//!
41//! #### Examples
42//!
43//! ```rust
44//! use simplematch::dowild;
45//!
46//! assert_eq!(dowild("foo*".as_bytes(), "foobar".as_bytes()), true);
47//! assert_eq!(dowild("foo?".as_bytes(), "fooa".as_bytes()), true)
48//! ```
49//!
50//! Or, bringing the trait [`DoWild`] in scope allows for more convenient access to this
51//! function without performance loss:
52//!
53//! ```rust
54//! use simplematch::DoWild;
55//!
56//! assert_eq!("foo*".dowild("foobar"), true);
57//! ```
58//!
59//! A possible usage with `char`:
60//!
61//! ```rust
62//! use simplematch::DoWild;
63//!
64//! let pattern = "foo*".chars().collect::<Vec<char>>();
65//! let haystack = "foobar".chars().collect::<Vec<char>>();
66//!
67//! assert_eq!(pattern.dowild(haystack), true);
68//! ```
69//!
70//! ### `dowild_with`
71//!
72//! ```rust, ignore
73//! use simplematch::Options;
74//!
75//! pub fn dowild_with<T>(pattern: &[T], haystack: &[T], options: Options<T>) -> bool
76//! where
77//! T: Wildcard + Ord,
78//! ```
79//!
80//! Matches the given `haystack` against the specified `pattern` with customizable [`Options`].
81//! This function allows for matching case insensitive, custom wildcard characters, escaping
82//! special characters and character classes including ranges.
83//!
84//! **Parameters:**
85//! - `pattern`: A bytes or char slice representing the wildcard pattern to match against.
86//! - `haystack`: A bytes or char slice representing the text to be matched.
87//! - `options`: An instance of the [`Options`] struct to customize the matching behavior.
88//!
89//! **Returns:**
90//! - `true` if the `pattern` matches the `haystack` according to the specified options,
91//! otherwise `false`.
92//!
93//! #### Examples
94//!
95//! ```rust
96//! use simplematch::{dowild_with, Options};
97//!
98//! let options = Options::default()
99//! .case_insensitive(true)
100//! .wildcard_any_with(b'%');
101//!
102//! assert_eq!(
103//! dowild_with("foo%".as_bytes(), "FOOBAR".as_bytes(), options),
104//! true
105//! );
106//! ```
107//!
108//! Like [`dowild`], the [`dowild_with`] function can be accessed directly on the string or u8
109//! slice, ...:
110//!
111//! ```rust
112//! use simplematch::{DoWild, Options};
113//!
114//! assert_eq!(
115//! "foo*".dowild_with("FOObar", Options::default().case_insensitive(true)),
116//! true
117//! );
118//! ```
119//!
120//! ## Character classes
121//!
122//! An expression `[...]` matches a single character if the first character following the
123//! leading `[` is not an `!`. The contents of the brackets must not be empty otherwise the
124//! brackets are interpreted literally (the pattern `a[]c` matches `a[]c` exactly); however, a
125//! `]` can be included as the first character within the brackets. For example, `[][!]`
126//! matches the three characters `[`, `]`, and `!`.
127//!
128//! ## Ranges
129//!
130//! A special convention exists where two characters separated by `-` represent a range.
131//! For instance, `[A-Fa-f0-9]` is equivalent to `[ABCDEFabcdef0123456789]`.
132//! To include `-` as a literal character, it must be placed as the first or last character
133//! within the brackets. For example, `[]-]` matches the two characters `]` and `-`. As opposed
134//! to regex, it is possible to revert a range `[F-A]` which has the same meaning as `[A-F]`.
135//!
136//! ## Complementation
137//!
138//! An expression `[!...]` matches any single character that is not included in the expression
139//! formed by removing the first `!`. For example, `[!]a-]` matches any character except `]`,
140//! `a`, and `-`.
141//!
142//! To remove the special meanings of `?`, `*`, and `[`, you can precede them with the escape
143//! character (per default the backslash character `\`). Within brackets, these characters
144//! represent themselves. For instance, `[[?*\\]` matches the four characters `[`, `?`, `*`,
145//! and `\`.
146//!
147//! ## Credits
148//!
149//! This linear-time wildcard matching algorithm is derived from the one presented in Russ
150//! Cox's great article about simple and performant glob matching (<https://research.swtch.com/glob>).
151//! Furthermore, the optimizations for the `?` handling are based on the article [Matching
152//! Wildcards: An Improved Algorithm for Big
153//! Data](https://developforperformance.com/MatchingWildcards_AnImprovedAlgorithmForBigData.html)
154//! written by Kirk J. Krauss.
155//!
156//! The `simplematch` algorithm is an improved version which uses generally about 2-6x less
157//! instructions than the original algorithm; tested with random small and big data.
158
159// spell-checker: ignore aaabc fooa Krauss
160
161#![cfg_attr(not(feature = "std"), no_std)]
162#![cfg_attr(docsrs, feature(doc_auto_cfg))]
163
164macro_rules! impl_dowild {
165 ( $type:ty: $for:ty ) => {
166 impl DoWild<$type> for $for {
167 fn dowild(&self, haystack: Self) -> bool {
168 dowild(self, haystack)
169 }
170
171 fn dowild_with(&self, haystack: Self, options: Options<$type>) -> bool {
172 dowild_with(self, haystack, options)
173 }
174 }
175 };
176 ( $type:ty: $for:ty => $( $tail:tt )* ) => {
177 impl DoWild<$type> for $for {
178 fn dowild(&self, haystack: Self) -> bool {
179 dowild(self $( $tail )*, haystack $( $tail )* )
180 }
181
182 fn dowild_with(&self, haystack: Self, options: Options<$type>) -> bool {
183 dowild_with(self $( $tail )*, haystack $( $tail )*, options)
184 }
185 }
186 };
187}
188
189#[cfg(not(feature = "std"))]
190extern crate alloc;
191
192#[cfg(not(feature = "std"))]
193use alloc::collections::VecDeque;
194#[cfg(not(feature = "std"))]
195use alloc::string::String;
196#[cfg(not(feature = "std"))]
197use alloc::vec::Vec;
198use core::cmp::Ordering;
199use core::fmt::Display;
200use core::ops::Deref;
201#[cfg(feature = "std")]
202use std::collections::VecDeque;
203#[cfg(feature = "std")]
204use std::error::Error;
205#[cfg(feature = "std")]
206use std::string::String;
207#[cfg(feature = "std")]
208use std::vec::Vec;
209
210/// A convenience trait to use [`dowild`] and [`dowild_with`] directly for this type
211///
212/// This trait is natively implemented for
213///
214/// * `&str`
215/// * `String`
216/// * `&[u8]`
217/// * `Vec<u8>`
218/// * `&[char]`
219/// * `Vec<char>`
220///
221/// # Examples
222///
223/// Use [`dowild`] directly on a `&str`
224///
225/// ```rust
226/// use simplematch::DoWild;
227///
228/// assert_eq!("foo*".dowild("foobar"), true);
229/// ```
230pub trait DoWild<T>
231where
232 T: Wildcard,
233{
234 /// Matches this `pattern` against the specified `haystack` using simple wildcard rules.
235 ///
236 /// See [`dowild`] for more details.
237 ///
238 /// # Examples
239 ///
240 /// ```rust
241 /// use simplematch::DoWild;
242 ///
243 /// assert_eq!("foo*".dowild("foobar"), true);
244 /// ```
245 #[must_use]
246 fn dowild(&self, haystack: Self) -> bool;
247
248 /// Matches this `pattern` against the specified `haystack` with customizable [`Options`].
249 ///
250 /// See [`dowild_with`] for more details.
251 ///
252 /// # Examples
253 ///
254 /// ```rust
255 /// use simplematch::{DoWild, Options};
256 ///
257 /// assert_eq!(
258 /// "foo*".dowild_with("foobar", Options::default().case_insensitive(true)),
259 /// true
260 /// );
261 /// ```
262 #[must_use]
263 fn dowild_with(&self, haystack: Self, options: Options<T>) -> bool;
264}
265
266/// The trait for types which should be able to be matched for a wildcard pattern
267pub trait Wildcard: Eq + Copy + Clone {
268 /// The default token to match any number of characters, usually `*`.
269 const DEFAULT_ANY: Self;
270 /// The default token to close a character class pattern, usually `]`.
271 const DEFAULT_CLASS_CLOSE: Self;
272 /// The default token to specify a range, usually `-`.
273 const DEFAULT_CLASS_HYPHEN: Self;
274 /// The default token to negate a character class, usually `!`.
275 const DEFAULT_CLASS_NEGATE: Self;
276 /// The default token to open a character class pattern, usually `[`.
277 const DEFAULT_CLASS_OPEN: Self;
278 /// The default token to escape special characters, usually `\`.
279 const DEFAULT_ESCAPE: Self;
280 /// The default token match exactly one character, usually `?`.
281 const DEFAULT_ONE: Self;
282
283 /// Returns `true` if two character match case-insensitive
284 fn match_one_case_insensitive(first: Self, second: Self) -> bool;
285 /// Returns `true` if two character match case-sensitive
286 fn match_one_case_sensitive(first: Self, second: Self) -> bool;
287
288 /// Returns `true` if the `token` matches the range from `low` to `high` case-insensitive
289 fn match_range_case_insensitive(token: Self, low: Self, high: Self) -> bool;
290 /// Returns `true` if the `token` matches the range from `low` to `high` case-sensitive
291 fn match_range_case_sensitive(token: Self, low: Self, high: Self) -> bool;
292}
293
294/// A simple type to hold the borrowed or owned value `T`
295///
296/// `Cow` would have been an alternative but it requires `std` and we don't need the actual
297/// copy-on-write property just a container for borrowed or owned data.
298#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
299enum BorrowedOrOwned<'a, T> {
300 Borrowed(&'a T),
301 Owned(T),
302}
303
304#[derive(Debug, Clone)]
305enum Class<T> {
306 Positive(Vec<ClassKind<T>>),
307 Negative(Vec<ClassKind<T>>),
308}
309
310#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
311enum ClassKind<T> {
312 /// A range like `a-z`
313 Range(T, T),
314 /// A single character
315 One(T),
316 /// A range which has the same start and end character like `z-z`
317 RangeOne(T),
318}
319
320/// The `Error` of the simplematch crate
321#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
322pub enum SimpleMatchError {
323 /// A character in [`Options`] was assigned multiple times
324 DuplicateCharacterAssignment,
325}
326
327// Represents a character class
328#[derive(Debug, Clone)]
329struct CharacterClass<T> {
330 /// If `None`, the character class is invalid.
331 class: Option<Class<T>>,
332 /// The end index in the pattern
333 end: usize,
334 /// The start index in the pattern
335 start: usize,
336}
337
338#[derive(Debug, Clone)]
339struct CharacterClasses<T>(VecDeque<CharacterClass<T>>);
340
341/// Customize the matching behavior of the [`dowild_with`] function
342#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
343#[non_exhaustive]
344pub struct Options<T>
345where
346 T: Wildcard,
347{
348 /// If `true` the patterns are matched case-sensitive
349 ///
350 /// The default is to match case-sensitive. Currently, only ascii characters are
351 /// considered.
352 pub case_sensitive: bool,
353
354 /// The token to negate a character class.
355 ///
356 /// The default is `!`
357 pub class_negate: T,
358
359 /// Set to `true` to enable character classes `[...]`.
360 ///
361 /// The default is `false`.
362 pub is_classes_enabled: bool,
363
364 /// Set to `true` to enable escaping special characters in the pattern.
365 ///
366 /// The default is `false`.
367 ///
368 /// The default wildcard characters that can be escaped per default are `*`, `?`. These
369 /// characters are adjustable. If character classes are enabled, `[` can be escaped, too.
370 ///
371 /// If the escape character is not escaping a special character it is matched literally.
372 /// For example `"\\a"` matches the escape character and `a` literally.
373 pub is_escape_enabled: bool,
374
375 /// The token in the pattern to match zero or more occurrences of any character.
376 ///
377 /// The default token is `*`.
378 pub wildcard_any: T,
379
380 /// The token in the pattern to escape special characters as defined by the other fields.
381 ///
382 /// The default is the backslash character `\`.
383 pub wildcard_escape: T,
384 /// The token in the pattern to match exactly one occurrence of any character.
385 ///
386 /// The default token is `?`.
387 pub wildcard_one: T,
388}
389
390impl<T> Deref for BorrowedOrOwned<'_, T> {
391 type Target = T;
392
393 #[inline]
394 fn deref(&self) -> &Self::Target {
395 match self {
396 BorrowedOrOwned::Borrowed(value) => value,
397 BorrowedOrOwned::Owned(value) => value,
398 }
399 }
400}
401
402impl<T> AsRef<T> for BorrowedOrOwned<'_, T> {
403 #[inline]
404 fn as_ref(&self) -> &T {
405 self
406 }
407}
408
409impl<T> CharacterClass<T>
410where
411 T: Wildcard + Ord,
412{
413 /// Create a new valid character class
414 #[inline]
415 const fn new(class: Option<Class<T>>, start: usize, end: usize) -> Self {
416 Self { class, end, start }
417 }
418
419 /// Create a new invalid character class
420 #[inline]
421 const fn new_invalid(start: usize, end: usize) -> Self {
422 Self::new(None, start, end)
423 }
424
425 /// Returns the length of this character class.
426 #[inline]
427 const fn len(&self) -> usize {
428 self.end - self.start + 1
429 }
430
431 /// Parse a `CharacterClass` with the opening bracket at the `start` index
432 ///
433 /// Beware, the starting condition is not verified in any way. A [`CharacterClass`] is
434 /// considered invalid, if there is no closing bracket found.
435 fn parse(start: usize, pattern: &[T], class_negate: T) -> Self {
436 // The first character of a range is always the opening bracket
437 let mut p_idx = start + 1;
438 if p_idx + 2 > pattern.len() {
439 // The pattern is too short to produce a valid range
440 return Self::new_invalid(start, p_idx + 1);
441 }
442
443 let mut class = if pattern[p_idx] == class_negate {
444 p_idx += 1;
445 Class::new_negative()
446 } else {
447 Class::new_positive()
448 };
449
450 // The `]` directly after the opening `[` (and possibly `!`) is special and matched literally
451 if pattern[p_idx] == T::DEFAULT_CLASS_CLOSE {
452 let kind = ClassKind::parse_first(p_idx, pattern);
453 p_idx += kind.len();
454 class.push(kind);
455 }
456
457 if p_idx < pattern.len() {
458 // Parse until we reach either the end of the string or find a `]`
459 while let Some(kind) = ClassKind::parse(p_idx, pattern) {
460 p_idx += kind.len();
461 if p_idx >= pattern.len() {
462 // The end of the string without a `]`
463 return Self::new_invalid(start, p_idx);
464 }
465 class.push(kind);
466 }
467
468 // The `None` case tells us we've found a `]` and a valid range
469 Self::new(Some(class), start, p_idx)
470 } else {
471 // We've reached the end of the string without a closing `]`
472 Self::new_invalid(start, p_idx)
473 }
474 }
475
476 /// If this `class` is valid, returns the result of [`Class::is_match`], otherwise `None`
477 #[inline]
478 fn try_match<F, G>(&self, token: T, match_one: F, match_range: G) -> Option<bool>
479 where
480 F: Fn(T, T) -> bool + Copy,
481 G: Fn(T, T, T) -> bool + Copy,
482 {
483 self.class
484 .as_ref()
485 .map(|class| class.is_match(token, match_one, match_range))
486 }
487}
488
489impl<T> CharacterClasses<T>
490where
491 T: Wildcard + Ord,
492{
493 /// Create a new `CharacterClass`
494 ///
495 /// This method does not allocate any memory.
496 #[inline]
497 fn new() -> Self {
498 Self(VecDeque::new())
499 }
500
501 /// Returns the `CharacterClass` with the given `index` as `start` index
502 #[inline]
503 fn get(&self, index: usize) -> Option<&CharacterClass<T>> {
504 self.0.iter().find(|r| r.start == index)
505 }
506
507 #[inline]
508 fn parse(start: usize, pattern: &[T], class_negate: T) -> CharacterClass<T> {
509 CharacterClass::parse(start, pattern, class_negate)
510 }
511
512 /// Parse a new class at this `index` or if already present return a reference to it.
513 ///
514 /// The character at the `index` has to be the opening bracket character. This implies that
515 /// `start < pattern.len()`. Note a [`CharacterClass`] can be invalid if there was no
516 /// closing bracket.
517 fn get_or_add(&mut self, start: usize, pattern: &[T], class_negate: T) -> &CharacterClass<T> {
518 if let Some(last) = self.0.back() {
519 #[allow(clippy::else_if_without_else)]
520 if last.start == start {
521 // SAFETY: The equivalent safe code is `return self.0.back().unwrap()`, but calling
522 // `back()` again and unwrap is unnecessary in this case. The reference `last` is
523 // guaranteed to be valid as it is just obtained from `self.0.back()`. The mutable
524 // reference to `self` prevents any concurrent modifications to `self.0` while this
525 // function is executing, ensuring that the data remains valid between the call to
526 // `back()` and the return here.
527 return unsafe { &*(last as *const CharacterClass<T>) };
528 // We already parsed this character class
529 } else if last.start > start {
530 return self.get(start).unwrap();
531 }
532 }
533
534 let class = Self::parse(start, pattern, class_negate);
535
536 // Stick to the default allocation strategy, doubling the buffer starting with a capacity of
537 // `1`. In case of an invalid class as first class, the maximum amount of classes is `1`, so
538 // `1` might be a good starting point in any case. The maximum amount of `(pattern.len() -
539 // start) / 3` valid classes is most likely too much in typical scenarios.
540 self.0.push_back(class);
541
542 // SAFETY: This unwrap is safe since we just added a class
543 unsafe { self.0.back().unwrap_unchecked() }
544 }
545
546 /// Remove classes that have a smaller starting index than the given `index`
547 #[inline]
548 fn prune(&mut self, index: usize) {
549 while let Some(first) = self.0.front() {
550 if first.start < index {
551 self.0.pop_front();
552 } else {
553 break;
554 }
555 }
556 }
557}
558
559impl<T> Class<T>
560where
561 T: Wildcard + Ord,
562{
563 /// Create a new positive `Class`.
564 #[inline]
565 const fn new_positive() -> Self {
566 Self::Positive(Vec::new())
567 }
568
569 /// Create a new negative `Class`.
570 #[inline]
571 const fn new_negative() -> Self {
572 Self::Negative(Vec::new())
573 }
574
575 /// Add a new [`ClassKind`] to this `Class`.
576 #[inline]
577 fn push(&mut self, kind: ClassKind<T>) {
578 match self {
579 Self::Positive(kinds) | Self::Negative(kinds) => {
580 if kinds.last() != Some(&kind) {
581 kinds.push(kind);
582 }
583 }
584 }
585 }
586
587 /// Returns `true` if a positive `Class` contains the given `token` or if negative doesn't
588 /// contain the `token`.
589 #[inline]
590 fn is_match<F, G>(&self, token: T, match_one: F, match_range: G) -> bool
591 where
592 F: Fn(T, T) -> bool + Copy,
593 G: Fn(T, T, T) -> bool + Copy,
594 {
595 match self {
596 Self::Positive(kinds) => kinds
597 .iter()
598 .any(|r| r.contains(&token, match_one, match_range)),
599 Self::Negative(kinds) => !kinds
600 .iter()
601 .any(|r| r.contains(&token, match_one, match_range)),
602 }
603 }
604}
605
606impl<T> ClassKind<T>
607where
608 T: Wildcard + Ord,
609{
610 #[inline]
611 fn contains<F, G>(&self, token: &T, match_one: F, match_range: G) -> bool
612 where
613 F: Fn(T, T) -> bool,
614 G: Fn(T, T, T) -> bool,
615 {
616 match self {
617 Self::Range(low, high) => match_range(*token, *low, *high),
618 Self::One(c) | Self::RangeOne(c) => match_one(*c, *token),
619 }
620 }
621
622 /// Does no out of bounds check for the first character
623 #[inline]
624 fn parse(index: usize, pattern: &[T]) -> Option<Self> {
625 if pattern[index] == T::DEFAULT_CLASS_CLOSE {
626 None
627 } else {
628 Some(Self::parse_first(index, pattern))
629 }
630 }
631
632 /// Does no out of bounds and `]` check for the first character
633 fn parse_first(index: usize, pattern: &[T]) -> Self {
634 let first = pattern[index];
635 if index + 2 < pattern.len() && pattern[index + 1] == T::DEFAULT_CLASS_HYPHEN {
636 let second = pattern[index + 2];
637 if second == T::DEFAULT_CLASS_CLOSE {
638 Self::One(first)
639 } else {
640 match first.cmp(&second) {
641 Ordering::Less => Self::Range(first, second),
642 Ordering::Equal => Self::RangeOne(first),
643 Ordering::Greater => Self::Range(second, first),
644 }
645 }
646 } else {
647 Self::One(first)
648 }
649 }
650
651 #[inline]
652 const fn len(&self) -> usize {
653 match self {
654 Self::Range(_, _) | Self::RangeOne(_) => 3,
655 Self::One(_) => 1,
656 }
657 }
658}
659
660impl<T> Default for Options<T>
661where
662 T: Wildcard,
663{
664 fn default() -> Self {
665 Self::new()
666 }
667}
668
669impl<T> Options<T>
670where
671 T: Wildcard,
672{
673 /// Create new `Options` for the [`dowild_with`] function.
674 #[must_use]
675 pub const fn new() -> Self {
676 Self {
677 case_sensitive: true,
678 wildcard_escape: T::DEFAULT_ESCAPE,
679 is_classes_enabled: false,
680 class_negate: T::DEFAULT_CLASS_NEGATE,
681 wildcard_any: T::DEFAULT_ANY,
682 wildcard_one: T::DEFAULT_ONE,
683 is_escape_enabled: false,
684 }
685 }
686
687 /// If `true` match the pattern case-insensitive.
688 ///
689 /// The default is to match case-sensitive.
690 ///
691 /// # Examples
692 ///
693 /// ```rust
694 /// use simplematch::Options;
695 ///
696 /// let options: Options<u8> = Options::default().case_insensitive(true);
697 /// ```
698 #[must_use]
699 pub const fn case_insensitive(mut self, yes: bool) -> Self {
700 self.case_sensitive = !yes;
701 self
702 }
703
704 /// If `true` enable escaping of special characters in the pattern.
705 ///
706 /// The default is `false` and the default escape character is backslash `\`.
707 ///
708 /// # Examples
709 ///
710 /// ```rust
711 /// use simplematch::Options;
712 ///
713 /// let options: Options<u8> = Options::default().enable_escape(true);
714 /// ```
715 #[must_use]
716 pub const fn enable_escape(mut self, yes: bool) -> Self {
717 self.is_escape_enabled = yes;
718 self
719 }
720
721 /// Enable escaping of special characters but use this `token` instead of the default.
722 ///
723 /// The default is `false` and the default escape character is backslash `\`.
724 ///
725 /// # Examples
726 ///
727 /// ```rust
728 /// use simplematch::Options;
729 ///
730 /// let options = Options::default().enable_escape_with(b'#');
731 /// ```
732 #[must_use]
733 pub const fn enable_escape_with(mut self, token: T) -> Self {
734 self.is_escape_enabled = true;
735 self.wildcard_escape = token;
736 self
737 }
738
739 /// If `true`, enable character classes `[...]`.
740 ///
741 /// The default is `false`.
742 ///
743 /// # Examples
744 ///
745 /// ```rust
746 /// use simplematch::Options;
747 ///
748 /// let options: Options<u8> = Options::default().enable_classes(true);
749 /// ```
750 #[must_use]
751 pub const fn enable_classes(mut self, yes: bool) -> Self {
752 self.is_classes_enabled = yes;
753 self
754 }
755
756 /// If `true`, enable character classes `[...]` but use this `token` for the negation.
757 ///
758 /// The default is `false` and the default negation character is exclamation mark `!`.
759 ///
760 /// # Examples
761 ///
762 /// Set the negation character to the same character as regex uses it.
763 ///
764 /// ```rust
765 /// use simplematch::Options;
766 ///
767 /// let options = Options::default().enable_classes_with(b'^');
768 /// ```
769 #[must_use]
770 pub const fn enable_classes_with(mut self, negation: T) -> Self {
771 self.is_classes_enabled = true;
772 self.class_negate = negation;
773 self
774 }
775
776 /// Use this `token` instead of the default `*` to match any occurrences of a characters.
777 ///
778 /// # Examples
779 ///
780 /// ```rust
781 /// use simplematch::Options;
782 ///
783 /// let options = Options::default().wildcard_any_with(b'%');
784 /// ```
785 #[must_use]
786 pub const fn wildcard_any_with(mut self, token: T) -> Self {
787 self.wildcard_any = token;
788 self
789 }
790
791 /// Use this `token` instead of the default `?` to match exactly one character.
792 ///
793 /// # Examples
794 ///
795 /// ```rust
796 /// use simplematch::Options;
797 ///
798 /// let options = Options::default().wildcard_any_with(b'_');
799 #[must_use]
800 pub const fn wildcard_one_with(mut self, token: T) -> Self {
801 self.wildcard_one = token;
802 self
803 }
804
805 /// Check `Options` for configuration errors
806 ///
807 /// An invalid configuration consists of duplicate character assignments. For example you
808 /// can't use `*` for the escape character and `wildcard_any` character simultaneously.
809 ///
810 /// # Errors
811 ///
812 /// Returns an error if these `Options` are invalid.
813 ///
814 /// # Examples
815 ///
816 /// Assigning `?` with [`wildcard_any_with`](Options::wildcard_any_with) fails this method.
817 ///
818 /// ```rust
819 /// use simplematch::{Options, SimpleMatchError};
820 ///
821 /// assert_eq!(
822 /// Options::default().wildcard_any_with(b'?').verified(),
823 /// Err(SimpleMatchError::DuplicateCharacterAssignment)
824 /// );
825 /// ```
826 pub fn verify(&self) -> Result<(), SimpleMatchError> {
827 if self.wildcard_any == self.wildcard_one
828 || self.wildcard_any == self.wildcard_escape
829 || self.wildcard_any == self.class_negate
830 || self.wildcard_one == self.wildcard_escape
831 || self.wildcard_one == self.class_negate
832 || self.wildcard_escape == self.class_negate
833 {
834 return Err(SimpleMatchError::DuplicateCharacterAssignment);
835 }
836
837 Ok(())
838 }
839
840 /// A convenience method that consumes and returns these `Options` if it succeeds.
841 ///
842 /// The only difference to [`verify`] is, that this method consumes the [`Options`]
843 /// returning it on success.
844 ///
845 /// # Errors
846 ///
847 /// Returns an error if `Options` are invalid.
848 ///
849 /// # Examples
850 ///
851 /// If the configuration is valid, this method returns these `Options`.
852 ///
853 /// ```rust
854 /// use simplematch::{Options, SimpleMatchError};
855 ///
856 /// let options = Options::default()
857 /// .wildcard_any_with(b'%')
858 /// .verified()
859 /// .unwrap();
860 /// ```
861 ///
862 /// Otherwise, for example assigning `?` with
863 /// [`wildcard_any_with`](Options::wildcard_any_with) fails.
864 ///
865 /// ```rust
866 /// use simplematch::{Options, SimpleMatchError};
867 ///
868 /// assert_eq!(
869 /// Options::default().wildcard_any_with(b'?').verified(),
870 /// Err(SimpleMatchError::DuplicateCharacterAssignment)
871 /// );
872 /// ```
873 ///
874 /// [`verify`]: Options::verify
875 pub fn verified(self) -> Result<Self, SimpleMatchError> {
876 self.verify().map(|()| self)
877 }
878}
879
880#[cfg(feature = "std")]
881impl Error for SimpleMatchError {}
882
883impl Display for SimpleMatchError {
884 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
885 match self {
886 Self::DuplicateCharacterAssignment => {
887 write!(
888 f,
889 "Verifying options failed: The options contain a duplicate character \
890 assignment."
891 )
892 }
893 }
894 }
895}
896
897////////////////////////////////////////////////////////////////////////////////
898// Our trait implementations for the basic types
899////////////////////////////////////////////////////////////////////////////////
900
901impl_dowild!(u8: &[u8]);
902impl_dowild!(u8: &str => .as_bytes());
903impl_dowild!(u8: String => .as_bytes());
904impl_dowild!(u8: Vec<u8> => .as_slice());
905impl_dowild!(char: &[char]);
906impl_dowild!(char: Vec<char> => .as_slice());
907
908impl Wildcard for u8 {
909 const DEFAULT_ANY: Self = b'*';
910 const DEFAULT_ESCAPE: Self = b'\\';
911 const DEFAULT_ONE: Self = b'?';
912 const DEFAULT_CLASS_CLOSE: Self = b']';
913 const DEFAULT_CLASS_HYPHEN: Self = b'-';
914 const DEFAULT_CLASS_NEGATE: Self = b'!';
915 const DEFAULT_CLASS_OPEN: Self = b'[';
916
917 #[inline]
918 fn match_one_case_sensitive(first: Self, second: Self) -> bool {
919 first == second
920 }
921
922 #[inline]
923 fn match_one_case_insensitive(first: Self, second: Self) -> bool {
924 first.eq_ignore_ascii_case(&second)
925 }
926
927 #[inline]
928 fn match_range_case_sensitive(token: Self, low: Self, high: Self) -> bool {
929 low <= token && token <= high
930 }
931
932 #[inline]
933 fn match_range_case_insensitive(token: Self, low: Self, high: Self) -> bool {
934 if low <= token && token <= high {
935 true
936 } else if !token.is_ascii_alphabetic() {
937 false
938 } else {
939 is_in_ascii_range_case_insensitive(token, low, high)
940 }
941 }
942}
943
944impl Wildcard for char {
945 const DEFAULT_ANY: Self = '*';
946 const DEFAULT_ESCAPE: Self = '\\';
947 const DEFAULT_ONE: Self = '?';
948 const DEFAULT_CLASS_CLOSE: Self = ']';
949 const DEFAULT_CLASS_HYPHEN: Self = '-';
950 const DEFAULT_CLASS_NEGATE: Self = '!';
951 const DEFAULT_CLASS_OPEN: Self = '[';
952
953 #[inline]
954 fn match_one_case_insensitive(first: Self, second: Self) -> bool {
955 first.eq_ignore_ascii_case(&second)
956 }
957
958 #[inline]
959 fn match_one_case_sensitive(first: Self, second: Self) -> bool {
960 first == second
961 }
962
963 #[inline]
964 fn match_range_case_sensitive(token: Self, low: Self, high: Self) -> bool {
965 low <= token && token <= high
966 }
967
968 #[inline]
969 fn match_range_case_insensitive(token: Self, low: Self, high: Self) -> bool {
970 if low <= token && token <= high {
971 true
972 } else if !(low.is_ascii() && high.is_ascii() && token.is_ascii_alphabetic()) {
973 false
974 } else {
975 is_in_ascii_range_case_insensitive(token as u8, low as u8, high as u8)
976 }
977 }
978}
979
980////////////////////////////////////////////////////////////////////////////////
981// The main dowild functions
982////////////////////////////////////////////////////////////////////////////////
983
984/// Returns `true` if the wildcard pattern matches the `haystack`.
985///
986/// Allowed wildcard characters are `*` to match any amount of characters and `?` to match
987/// exactly one character.
988///
989/// This is the basic algorithm without customization options to provide the best performance.
990/// If you need [`Options`] you can use [`dowild_with`].
991///
992/// Instead of using this function, match directly on strings, u8 slices, ... without
993/// performance loss, if you bring the [`DoWild`] trait in scope.
994///
995/// See also the [library documentation](crate) for more details.
996///
997/// # Examples
998///
999/// ```rust
1000/// use simplematch::dowild;
1001///
1002/// assert_eq!(dowild("*bc".as_bytes(), "aaabc".as_bytes()), true);
1003/// ```
1004///
1005/// or more conveniently directly on a string
1006///
1007/// ```rust
1008/// use simplematch::DoWild;
1009///
1010/// assert_eq!("*bc".dowild("aaabc"), true);
1011/// ```
1012#[must_use]
1013pub fn dowild<T>(pattern: &[T], haystack: &[T]) -> bool
1014where
1015 T: Wildcard,
1016{
1017 let mut p_idx = 0;
1018 let mut h_idx = 0;
1019
1020 let mut next_p_idx = 0;
1021 let mut next_h_idx = 0;
1022
1023 let wildcard_any = T::DEFAULT_ANY;
1024 let wildcard_one = T::DEFAULT_ONE;
1025
1026 let mut has_seen_wildcard_any = false;
1027 while p_idx < pattern.len() || h_idx < haystack.len() {
1028 if p_idx < pattern.len() {
1029 match pattern[p_idx] {
1030 // This (expensive) case is ensured to be entered only once per `wildcard_any` (or
1031 // multiple consecutive `wildcard_any`) character in the pattern. This allows us to
1032 // perform optimizations which would be otherwise not worth it. Note that every
1033 // increment of the indices in this match case also increments the respective
1034 // `next_*` index in the end.
1035 c if c == wildcard_any => {
1036 has_seen_wildcard_any = true;
1037 p_idx += 1;
1038
1039 while p_idx < pattern.len() && pattern[p_idx] == wildcard_any {
1040 p_idx += 1;
1041 }
1042 if p_idx >= pattern.len() {
1043 return true;
1044 }
1045
1046 let next_c = pattern[p_idx];
1047 if next_c == wildcard_one {
1048 // 1. This optimization prevents checking for the same `wildcard_one`
1049 // character in the big loop again.
1050 // 2. More importantly for the performance, we can advance the pattern and
1051 // haystack for all index counters including `next_h_idx` and
1052 // `next_p_idx`.
1053 while h_idx < haystack.len() {
1054 p_idx += 1;
1055 h_idx += 1;
1056 if !(p_idx < pattern.len() && pattern[p_idx] == next_c) {
1057 break;
1058 }
1059 }
1060 // The end of the haystack might not yet be reached but for example `*????`
1061 // matches anything.
1062 if p_idx >= pattern.len() {
1063 return true;
1064 }
1065 } else {
1066 // Advancing the haystack and indirectly the `next_h_idx` counter to the
1067 // first match significantly enhances the overall performance.
1068 while h_idx < haystack.len() && haystack[h_idx] != next_c {
1069 h_idx += 1;
1070 }
1071 if h_idx >= haystack.len() {
1072 return false;
1073 }
1074 }
1075
1076 // Instead of pinning `next_p_idx` to the `wildcard_any` index and entering this
1077 // match case in the big loop again after a reset to the `next` indices, it's
1078 // more efficient to pin it to the first character after `wildcard_any` (or
1079 // after `wildcard_one` if it is the character after `wildcard_any`). However, we
1080 // need to ensure in this match case that `next_p_idx` is not out of bounds.
1081 next_p_idx = p_idx;
1082 next_h_idx = h_idx;
1083 continue;
1084 }
1085 c if c == wildcard_one => {
1086 if h_idx < haystack.len() {
1087 p_idx += 1;
1088 h_idx += 1;
1089 continue;
1090 }
1091 }
1092 c => {
1093 if h_idx < haystack.len() && haystack[h_idx] == c {
1094 p_idx += 1;
1095 h_idx += 1;
1096 continue;
1097 }
1098 }
1099 }
1100 }
1101 // If `true`, we need to reset. Therefore, this statement can be entered multiple times per
1102 // `wildcard_any`, so we need to be more careful with optimizations here than in the
1103 // `wildcard_any` match case above.
1104 if has_seen_wildcard_any && next_h_idx < haystack.len() {
1105 p_idx = next_p_idx;
1106 next_h_idx += 1;
1107
1108 // We don't enter the `wildcard_any` match case in the big loop again, so we have to
1109 // apply this optimization from above here again, if applicable. This check let's the
1110 // compiler optimize the loop better than without the check although p_idx can't be
1111 // out of bounds here.
1112 if p_idx < pattern.len() {
1113 while next_h_idx < haystack.len() && haystack[next_h_idx] != pattern[p_idx] {
1114 next_h_idx += 1;
1115 }
1116 }
1117
1118 h_idx = next_h_idx;
1119 continue;
1120 }
1121
1122 return false;
1123 }
1124
1125 // The pattern and the haystack are both exhausted which means we have a match
1126 true
1127}
1128
1129/// Returns `true` if the wildcard pattern matches the `haystack`. This method can be
1130/// customized with [`Options`].
1131///
1132/// Don't use this method if you only need the default [`Options`]. The [`dowild`] function is
1133/// more performant in such cases.
1134///
1135/// Like with [`dowild`], allowed wildcard characters are `*` to match any amount of characters
1136/// and `?` to match exactly one character.
1137///
1138/// The [`Options`] structure allows for case-insensitive matching. You can customize the
1139/// `wildcard_any` character (`*`) and the `wildcard_one` character (`?`). Escaping can also
1140/// be enabled, allowing you to specify a custom escape character. Additionally, character
1141/// classes and ranges, such as `[a-z]`, are supported, and the negation character can be
1142/// customized to match all characters not included in a specified range, as in `[!a-z]`.
1143///
1144/// See also the [library documentation](crate) for more details.
1145///
1146/// # Examples
1147///
1148/// ```rust
1149/// use simplematch::{dowild_with, Options};
1150///
1151/// assert_eq!(
1152/// dowild_with(
1153/// "*bc".as_bytes(),
1154/// "AAabc".as_bytes(),
1155/// Options::default().case_insensitive(true)
1156/// ),
1157/// true
1158/// );
1159/// ```
1160///
1161/// or more conveniently match directly on a string bringing the [`DoWild`] trait in
1162/// scope.
1163///
1164/// ```rust
1165/// use simplematch::{DoWild, Options};
1166///
1167/// assert_eq!(
1168/// "%bc".dowild_with("aaabc", Options::default().wildcard_any_with(b'%')),
1169/// true
1170/// );
1171/// ```
1172#[must_use]
1173pub fn dowild_with<T>(pattern: &[T], haystack: &[T], options: Options<T>) -> bool
1174where
1175 T: Wildcard + Ord,
1176{
1177 if options.case_sensitive {
1178 dowild_with_worker(
1179 pattern,
1180 haystack,
1181 options,
1182 T::match_one_case_sensitive,
1183 T::match_range_case_sensitive,
1184 )
1185 } else {
1186 dowild_with_worker(
1187 pattern,
1188 haystack,
1189 options,
1190 T::match_one_case_insensitive,
1191 T::match_range_case_insensitive,
1192 )
1193 }
1194}
1195
1196/// This method has the same structure like [`dowild`] but can apply [`Options`]
1197///
1198/// Customizability has a price performance-wise, so this method is by nature slower than
1199/// [`dowild`].
1200#[inline]
1201#[allow(clippy::too_many_lines)]
1202fn dowild_with_worker<F, G, T>(
1203 pattern: &[T],
1204 haystack: &[T],
1205 options: Options<T>,
1206 match_one: F,
1207 match_range: G,
1208) -> bool
1209where
1210 T: Wildcard + Ord,
1211 F: Fn(T, T) -> bool + Copy,
1212 G: Fn(T, T, T) -> bool + Copy,
1213{
1214 let Options {
1215 class_negate,
1216 is_classes_enabled,
1217 is_escape_enabled,
1218 wildcard_any,
1219 wildcard_escape,
1220 wildcard_one,
1221 ..
1222 } = options;
1223
1224 let is_wildcard_any = |token: T| token == wildcard_any;
1225 let is_wildcard_one = |token: T| token == wildcard_one;
1226 let is_escape = |token: T| is_escape_enabled && token == wildcard_escape;
1227 let is_class_open = |token: T| is_classes_enabled && token == T::DEFAULT_CLASS_OPEN;
1228
1229 let is_special = |token: T| {
1230 token == wildcard_any
1231 || token == wildcard_one
1232 || token == wildcard_escape
1233 || (is_classes_enabled && token == T::DEFAULT_CLASS_OPEN)
1234 };
1235
1236 let is_valid_class_or_escape = |token: T, p_idx: usize, invalid_class_idx: usize| {
1237 (is_classes_enabled && token == T::DEFAULT_CLASS_OPEN && p_idx < invalid_class_idx)
1238 || (is_escape_enabled && token == wildcard_escape)
1239 };
1240
1241 let mut p_idx = 0;
1242 let mut h_idx = 0;
1243
1244 let mut next_p_idx = 0;
1245 let mut next_h_idx = 0;
1246
1247 // There are no allocations, yet. `CharacterClasses` allocate on first use.
1248 let mut classes = CharacterClasses::new();
1249
1250 let mut has_seen_wildcard_any = false;
1251 let mut invalid_class_idx = usize::MAX;
1252
1253 while p_idx < pattern.len() || h_idx < haystack.len() {
1254 if p_idx < pattern.len() {
1255 match pattern[p_idx] {
1256 c if is_wildcard_any(c) => {
1257 has_seen_wildcard_any = true;
1258 p_idx += 1;
1259
1260 while p_idx < pattern.len() && is_wildcard_any(pattern[p_idx]) {
1261 p_idx += 1;
1262 }
1263 if p_idx >= pattern.len() {
1264 return true;
1265 }
1266
1267 let next_c = pattern[p_idx];
1268 #[allow(clippy::else_if_without_else)]
1269 if is_wildcard_one(next_c) {
1270 while h_idx < haystack.len() {
1271 p_idx += 1;
1272 h_idx += 1;
1273 if !(p_idx < pattern.len() && is_wildcard_one(pattern[p_idx])) {
1274 break;
1275 }
1276 }
1277 if p_idx >= pattern.len() {
1278 return true;
1279 }
1280 } else if !is_valid_class_or_escape(next_c, p_idx, invalid_class_idx) {
1281 while h_idx < haystack.len() && !match_one(haystack[h_idx], next_c) {
1282 h_idx += 1;
1283 }
1284 if h_idx >= haystack.len() {
1285 return false;
1286 }
1287 }
1288
1289 next_p_idx = p_idx;
1290 next_h_idx = h_idx;
1291 continue;
1292 }
1293 c if is_wildcard_one(c) => {
1294 if h_idx < haystack.len() {
1295 p_idx += 1;
1296 h_idx += 1;
1297 continue;
1298 }
1299 }
1300 // Handling of the escape character. If it is the last character in the pattern, it
1301 // can only stand for itself.
1302 c if is_escape(c) && p_idx + 1 < pattern.len() => {
1303 if h_idx < haystack.len() {
1304 let next_c = pattern[p_idx + 1];
1305 let h = haystack[h_idx];
1306
1307 #[allow(clippy::else_if_without_else)]
1308 if is_special(next_c) && h == next_c {
1309 p_idx += 2;
1310 h_idx += 1;
1311 continue;
1312 } else if !is_special(next_c) && h == wildcard_escape {
1313 p_idx += 1;
1314 h_idx += 1;
1315 continue;
1316 }
1317 }
1318 }
1319 // Handle character classes. To avoid parsing the same classes multiple times on
1320 // reset, every class including the invalid ones are stored in a container. However,
1321 // classes that are outside of the possible index don't need to be considered anymore
1322 // and are pruned.
1323 c if is_class_open(c) && p_idx < invalid_class_idx && p_idx + 1 < pattern.len() => {
1324 if h_idx < haystack.len() {
1325 let class = if has_seen_wildcard_any {
1326 // Try to get rid of classes outside of the possible index
1327 classes.prune(next_p_idx);
1328 BorrowedOrOwned::Borrowed(classes.get_or_add(
1329 p_idx,
1330 pattern,
1331 class_negate,
1332 ))
1333 } else {
1334 // There's no need to store character classes as long as we don't require
1335 // to reset.
1336 BorrowedOrOwned::Owned(CharacterClasses::parse(
1337 p_idx,
1338 pattern,
1339 class_negate,
1340 ))
1341 };
1342
1343 // Try to match this class. If it is an invalid class, we can interpret the
1344 // opening bracket character literally and the rest of the pattern as if
1345 // there is no class. If the class is valid and matched, we can advance as
1346 // usual, otherwise we need to reset.
1347 #[allow(clippy::else_if_without_else)]
1348 if let Some(is_match) =
1349 class.try_match(haystack[h_idx], match_one, match_range)
1350 {
1351 p_idx += class.len();
1352 if is_match {
1353 h_idx += 1;
1354 continue;
1355 }
1356 } else {
1357 invalid_class_idx = class.as_ref().start;
1358 // A small shortcut to avoid the big loop and enter the generic
1359 // character case.
1360 if match_one(haystack[h_idx], T::DEFAULT_CLASS_OPEN) {
1361 p_idx += 1;
1362 h_idx += 1;
1363 continue;
1364 }
1365 }
1366 }
1367 }
1368 c => {
1369 if h_idx < haystack.len() && match_one(haystack[h_idx], c) {
1370 p_idx += 1;
1371 h_idx += 1;
1372 continue;
1373 }
1374 }
1375 }
1376 }
1377 if has_seen_wildcard_any && next_h_idx < haystack.len() {
1378 p_idx = next_p_idx;
1379 next_h_idx += 1;
1380
1381 if p_idx < pattern.len()
1382 && !is_valid_class_or_escape(pattern[p_idx], p_idx, invalid_class_idx)
1383 {
1384 while next_h_idx < haystack.len() && !match_one(haystack[next_h_idx], pattern[p_idx])
1385 {
1386 next_h_idx += 1;
1387 }
1388 }
1389
1390 h_idx = next_h_idx;
1391 continue;
1392 }
1393
1394 return false;
1395 }
1396 true
1397}
1398
1399/// Returns true if the `token` is in the case insensitive inclusive range from `low` to `high`
1400///
1401/// `token` has to be ascii alphabetic character.
1402///
1403/// This function can be counter-intuitive, for example for `A-j` and the token `z`, this
1404/// function returns `true`. However, this is how regex engines (tested with python, go, the
1405/// regex crate, ...) usually evaluate it.
1406#[inline]
1407const fn is_in_ascii_range_case_insensitive(token: u8, low: u8, high: u8) -> bool {
1408 const ASCII_CASE_MASK: u8 = 0b0010_0000;
1409
1410 if token.is_ascii_lowercase() {
1411 let token_uppercase = token ^ ASCII_CASE_MASK;
1412 low <= token_uppercase && token_uppercase <= high
1413 // Since token is alphabetic it is an uppercase character
1414 } else {
1415 let token_lowercase = token | ASCII_CASE_MASK;
1416 low <= token_lowercase && token_lowercase <= high
1417 }
1418}
1419
1420#[cfg(test)]
1421mod tests {
1422 use rstest::rstest;
1423
1424 use super::*;
1425
1426 #[rstest]
1427 #[case::same_case_sensitive(b'j', b'j', true, true)]
1428 #[case::different_case_case_sensitive(b'j', b'J', true, false)]
1429 #[case::different_char_case_sensitive(b'a', b'b', true, false)]
1430 #[case::same_case_insensitive(b'j', b'j', false, true)]
1431 #[case::different_case_insensitive(b'j', b'J', false, true)]
1432 #[case::different_char_case_insensitive(b'a', b'B', false, false)]
1433 fn impl_wildcard_match_one(
1434 #[case] first: u8,
1435 #[case] second: u8,
1436 #[case] case_sensitive: bool,
1437 #[case] expected: bool,
1438 ) {
1439 if case_sensitive {
1440 assert_eq!(Wildcard::match_one_case_sensitive(first, second), expected);
1441 assert_eq!(
1442 Wildcard::match_one_case_sensitive(first as char, second as char),
1443 expected
1444 );
1445 } else {
1446 assert_eq!(
1447 Wildcard::match_one_case_insensitive(first, second),
1448 expected
1449 );
1450 assert_eq!(
1451 Wildcard::match_one_case_insensitive(first as char, second as char),
1452 expected
1453 );
1454 }
1455 }
1456
1457 #[rstest]
1458 #[case::all_the_same(b'j', b'j', b'j', true)]
1459 #[case::low_is_higher_high_is_same(b'j', b'k', b'j', false)]
1460 #[case::low_is_lower_high_is_same(b'j', b'i', b'j', true)]
1461 #[case::high_is_lower_low_is_same(b'j', b'k', b'i', false)]
1462 #[case::high_is_higher_low_is_same(b'j', b'j', b'k', true)]
1463 #[case::non_alpha_when_false(b'#', b'*', b']', false)]
1464 #[case::non_alpha_when_true(b'+', b'*', b']', true)]
1465 #[case::only_token_alpha(b'a', b'*', b']', false)]
1466 #[case::only_token_big_alpha(b'A', b'*', b']', true)]
1467 #[case::between_alphabetic(b']', b'*', b'B', false)]
1468 fn impl_wildcard_match_range_when_case_sensitive(
1469 #[case] token: u8,
1470 #[case] low: u8,
1471 #[case] high: u8,
1472 #[case] expected: bool,
1473 ) {
1474 assert_eq!(
1475 Wildcard::match_range_case_sensitive(token, low, high),
1476 expected
1477 );
1478 assert_eq!(
1479 Wildcard::match_range_case_sensitive(token as char, low as char, high as char),
1480 expected
1481 );
1482 }
1483
1484 #[rstest]
1485 #[case::all_the_same_small(b'j', b'j', b'j', true)]
1486 #[case::all_the_same_big(b'J', b'J', b'J', true)]
1487 // This token is one of the characters between `Z` and `a`
1488 #[case::no_alpha_low_is_big(b'[', b'A', b'z', true)]
1489 #[case::no_alpha_both_big(b'[', b'A', b'Z', false)]
1490 #[case::no_alpha_low_is_small(b'[', b'a', b'z', false)]
1491 #[case::no_alpha_both_small(b'[', b'a', b'z', false)]
1492 #[case::all_small_middle(b'j', b'a', b'z', true)]
1493 #[case::all_small_low_is_higher(b'j', b'k', b'z', false)]
1494 #[case::all_small_high_is_lower(b'j', b'a', b'i', false)]
1495 #[case::all_big_middle(b'J', b'A', b'Z', true)]
1496 #[case::all_big_low_is_higher(b'J', b'K', b'Z', false)]
1497 #[case::all_big_high_is_lower(b'J', b'A', b'I', false)]
1498 #[case::big_a_to_j(b'z', b'A', b'j', true)]
1499 #[case::non_alpha_when_false(b'#', b'*', b']', false)]
1500 #[case::control_when_false(b'\x1f', b'*', b']', false)]
1501 #[case::non_alpha_when_true(b'+', b'*', b']', true)]
1502 #[case::only_token_alpha(b'a', b'*', b']', true)]
1503 #[case::only_token_big_alpha(b'A', b'*', b']', true)]
1504 #[case::between_alphabetic(b']', b'*', b'B', false)]
1505 fn impl_wildcard_match_range_when_case_insensitive(
1506 #[case] token: u8,
1507 #[case] low: u8,
1508 #[case] high: u8,
1509 #[case] expected: bool,
1510 ) {
1511 assert_eq!(
1512 Wildcard::match_range_case_insensitive(token, low, high),
1513 expected
1514 );
1515 assert_eq!(
1516 Wildcard::match_range_case_insensitive(token as char, low as char, high as char),
1517 expected
1518 );
1519 }
1520}