1#![no_std]
16#![cfg_attr(feature = "fatal-warnings", deny(warnings))]
17extern crate alloc;
77
78use alloc::borrow::Cow;
79use alloc::vec::Vec;
80use core::fmt::{Debug, Display, Formatter};
81use core::ops::Range;
82use thiserror::Error;
83
84#[derive(Eq, PartialEq, Error, Debug)]
86pub enum WildcardError {
87 #[error("wildcard syntax error at position {position}: {message}")]
89 Syntax {
90 position: usize,
92 message: &'static str,
94 },
95 #[error("invalid configuration of special symbols")]
98 InvalidSpecialSymbolsConfiguration,
99}
100
101pub trait WildcardSymbol: Eq + Copy {
103 const DEFAULT_METASYMBOL_ANY: Self;
107 const DEFAULT_METASYMBOL_ONE: Self;
111 const DEFAULT_METASYMBOL_ESCAPE: Self;
115
116 fn eq_case_insensitive(a: Self, b: Self) -> bool;
118}
119
120impl WildcardSymbol for u8 {
121 const DEFAULT_METASYMBOL_ANY: u8 = b'*';
122 const DEFAULT_METASYMBOL_ONE: u8 = b'?';
123 const DEFAULT_METASYMBOL_ESCAPE: u8 = b'\\';
124
125 fn eq_case_insensitive(a: u8, b: u8) -> bool {
126 a.eq_ignore_ascii_case(&b)
127 }
128}
129
130impl WildcardSymbol for char {
131 const DEFAULT_METASYMBOL_ANY: char = '*';
132 const DEFAULT_METASYMBOL_ONE: char = '?';
133 const DEFAULT_METASYMBOL_ESCAPE: char = '\\';
134
135 fn eq_case_insensitive(a: char, b: char) -> bool {
136 a.to_lowercase().eq(b.to_lowercase())
137 }
138}
139
140#[derive(Clone)]
141struct WildcardMatchingConfig<S> {
142 metasymbol_any: S,
143 metasymbol_one: Option<S>,
144 symbol_escape: Option<S>,
145 case_insensitive: bool,
146}
147
148impl<S> WildcardMatchingConfig<S>
149where
150 S: WildcardSymbol,
151{
152 fn validate(&self) -> Result<(), WildcardError> {
153 let has_special_symbol_repetitions = is_symbol(self.metasymbol_any, self.metasymbol_one)
155 || is_symbol(self.metasymbol_any, self.symbol_escape)
156 || (self.metasymbol_one.is_some() && self.metasymbol_one == self.symbol_escape);
157
158 (!has_special_symbol_repetitions)
159 .then_some(())
160 .ok_or(WildcardError::InvalidSpecialSymbolsConfiguration)
161 }
162}
163
164impl<S> Default for WildcardMatchingConfig<S>
165where
166 S: WildcardSymbol,
167{
168 fn default() -> Self {
169 WildcardMatchingConfig {
170 metasymbol_any: S::DEFAULT_METASYMBOL_ANY,
171 metasymbol_one: Some(S::DEFAULT_METASYMBOL_ONE),
172 symbol_escape: Some(S::DEFAULT_METASYMBOL_ESCAPE),
173 case_insensitive: false,
174 }
175 }
176}
177
178pub struct WildcardBuilder<'a, S = u8>
181where
182 S: WildcardSymbol,
183{
184 pattern: Cow<'a, [S]>,
185 config: WildcardMatchingConfig<S>,
186}
187
188impl<'a, S> WildcardBuilder<'a, S>
189where
190 S: WildcardSymbol,
191{
192 #[must_use]
194 pub fn new(pattern: &'a [S]) -> WildcardBuilder<'a, S> {
195 WildcardBuilder::from_cow(Cow::Borrowed(pattern))
196 }
197
198 #[must_use]
200 pub fn from_owned(pattern: Vec<S>) -> WildcardBuilder<'static, S> {
201 WildcardBuilder::from_cow(Cow::Owned(pattern))
202 }
203
204 #[must_use]
206 pub fn from_cow(pattern: Cow<'a, [S]>) -> WildcardBuilder<'a, S> {
207 WildcardBuilder { pattern, config: WildcardMatchingConfig::default() }
208 }
209
210 #[must_use]
214 pub fn with_any_metasymbol(mut self, s: S) -> WildcardBuilder<'a, S> {
215 self.config.metasymbol_any = s;
216 self
217 }
218
219 #[must_use]
223 pub fn with_one_metasymbol(mut self, s: S) -> WildcardBuilder<'a, S> {
224 self.config.metasymbol_one = Some(s);
225 self
226 }
227
228 #[must_use]
230 pub fn without_one_metasymbol(mut self) -> WildcardBuilder<'a, S> {
231 self.config.metasymbol_one = None;
232 self
233 }
234
235 #[must_use]
239 pub fn with_escape_symbol(mut self, s: S) -> WildcardBuilder<'a, S> {
240 self.config.symbol_escape = Some(s);
241 self
242 }
243
244 #[must_use]
246 pub fn without_escape(mut self) -> WildcardBuilder<'a, S> {
247 self.config.symbol_escape = None;
248 self
249 }
250
251 #[must_use]
255 pub fn case_insensitive(mut self, on: bool) -> WildcardBuilder<'a, S> {
256 self.config.case_insensitive = on;
257 self
258 }
259
260 pub fn build(self) -> Result<Wildcard<'a, S>, WildcardError> {
262 Wildcard::new_with_config(self.pattern, self.config)
263 }
264}
265
266#[derive(Eq, PartialEq, Copy, Clone, Debug)]
268pub enum WildcardToken<S = u8> {
269 MetasymbolAny,
271 MetasymbolOne,
273 Symbol(S),
275}
276
277impl<S> WildcardToken<S> {
278 #[must_use]
280 pub fn is_metasymbol(self) -> bool {
281 match self {
282 WildcardToken::MetasymbolAny | WildcardToken::MetasymbolOne => true,
283 WildcardToken::Symbol(_) => false,
284 }
285 }
286}
287
288#[derive(Clone)]
298pub struct Wildcard<'a, S = u8>
299where
300 S: WildcardSymbol,
301{
302 pattern: Cow<'a, [S]>,
303 config: WildcardMatchingConfig<S>,
304 metasymbol_count: usize,
305}
306
307impl<'a, S> Wildcard<'a, S>
308where
309 S: WildcardSymbol,
310{
311 pub fn new(pattern: &'a [S]) -> Result<Wildcard<'a, S>, WildcardError> {
313 Wildcard::new_with_config(Cow::Borrowed(pattern), WildcardMatchingConfig::default())
314 }
315
316 pub fn from_owned(pattern: Vec<S>) -> Result<Wildcard<'static, S>, WildcardError> {
318 Wildcard::new_with_config(Cow::Owned(pattern), WildcardMatchingConfig::default())
319 }
320
321 pub fn from_cow(pattern: Cow<'a, [S]>) -> Result<Wildcard<'a, S>, WildcardError> {
323 Wildcard::new_with_config(pattern, WildcardMatchingConfig::default())
324 }
325
326 fn new_with_config(
327 pattern: Cow<'a, [S]>,
328 config: WildcardMatchingConfig<S>,
329 ) -> Result<Wildcard<'a, S>, WildcardError> {
330 config.validate()?;
331
332 let metasymbol_count = validate_syntax(&pattern, &config)?;
333 let wildcard = Wildcard { pattern, config, metasymbol_count };
334
335 Ok(wildcard)
336 }
337
338 #[inline]
340 #[must_use]
341 pub fn is_match(&self, input: &[S]) -> bool {
342 match self.config.case_insensitive {
345 true => matches(
346 &self.config,
347 &self.pattern,
348 input,
349 WildcardSymbol::eq_case_insensitive,
350 |_| (),
351 ),
352 false => matches(&self.config, &self.pattern, input, |a, b| a == b, |_| ()),
353 }
354 }
355
356 #[inline]
371 #[must_use]
372 pub fn captures<'b>(&self, input: &'b [S]) -> Option<Vec<&'b [S]>> {
373 let mut captures = Vec::with_capacity(self.metasymbol_count);
374
375 let is_match = {
376 let capture = |range| captures.push(&input[range]);
377
378 match self.config.case_insensitive {
381 true => matches(
382 &self.config,
383 &self.pattern,
384 input,
385 WildcardSymbol::eq_case_insensitive,
386 capture,
387 ),
388 false => matches(&self.config, &self.pattern, input, |a, b| a == b, capture),
389 }
390 };
391
392 match is_match {
393 true => {
394 if let Some(metasymbol_one) = self.config.metasymbol_one {
397 fill_in_metasymbol_one_captures(
398 self.config.metasymbol_any,
399 metasymbol_one,
400 self.config.symbol_escape,
401 &self.pattern,
402 input,
403 &mut captures,
404 );
405 }
406
407 debug_assert_eq!(captures.len(), self.metasymbol_count);
408
409 Some(captures)
410 }
411 false => None,
412 }
413 }
414
415 #[must_use]
417 pub fn pattern(&self) -> &[S] {
418 self.pattern.as_ref()
419 }
420
421 #[must_use]
424 pub fn metasymbol_count(&self) -> usize {
425 self.metasymbol_count
426 }
427
428 pub fn parsed(&self) -> impl Iterator<Item = WildcardToken<S>> + '_ {
430 let mut i = 0;
431
432 core::iter::from_fn(move || {
433 if i >= self.pattern.len() {
434 None
435 } else {
436 let symbol = self.pattern[i];
437
438 if is_symbol(symbol, self.config.symbol_escape) {
439 let s = self.pattern[i + 1];
441 i += 2;
442 Some(WildcardToken::Symbol(s))
443 } else if self.pattern[i] == self.config.metasymbol_any {
444 i += 1;
445 Some(WildcardToken::MetasymbolAny)
446 } else if is_symbol(symbol, self.config.metasymbol_one) {
447 i += 1;
448 Some(WildcardToken::MetasymbolOne)
449 } else {
450 i += 1;
451 Some(WildcardToken::Symbol(symbol))
452 }
453 }
454 })
455 }
456}
457
458fn validate_syntax<S>(
460 pattern: &[S],
461 config: &WildcardMatchingConfig<S>,
462) -> Result<usize, WildcardError>
463where
464 S: Eq + Copy,
465{
466 let symbol_escape = config.symbol_escape;
470 let metasymbol_any = config.metasymbol_any;
471 let metasymbol_one = config.metasymbol_one;
472
473 let pattern_len = pattern.len();
474
475 let mut metasymbols = 0;
476 let mut escape = false;
477 let mut i = 0;
478
479 while i < pattern_len {
481 let symbol = pattern[i];
482
483 if escape {
484 if symbol != metasymbol_any
485 && !is_symbol(symbol, metasymbol_one)
486 && !is_symbol(symbol, symbol_escape)
487 {
488 return Err(WildcardError::Syntax {
489 position: i - 1,
490 message: "invalid escape sequence",
491 });
492 }
493
494 escape = false;
495 } else if is_symbol(symbol, symbol_escape) {
496 escape = true;
497 } else if symbol == metasymbol_any || is_symbol(symbol, metasymbol_one) {
498 metasymbols += 1;
499 }
500
501 i += 1;
502 }
503
504 if escape {
505 return Err(WildcardError::Syntax {
506 position: pattern_len - 1,
507 message: "incomplete escape sequence at the end of the wildcard",
508 });
509 }
510
511 Ok(metasymbols)
512}
513
514fn fill_in_metasymbol_one_captures<'a, S>(
518 metasymbol_any: S,
519 metasymbol_one: S,
520 symbol_escape: Option<S>,
521 pattern: &[S],
522 input: &'a [S],
523 captures: &mut Vec<&'a [S]>,
524) where
525 S: Eq + Copy,
526{
527 let pattern_len = pattern.len();
531
532 let mut input_index = 0;
533 let mut captures_index = 0;
534 let mut escape = false;
535 let mut i = 0;
536
537 while i < pattern_len {
539 let symbol = pattern[i];
540
541 if escape {
542 escape = false;
543 } else if is_symbol(symbol, symbol_escape) {
544 escape = true;
545 input_index += 1;
546 } else if symbol == metasymbol_any {
547 input_index += captures[captures_index].len();
548 captures_index += 1;
549 } else if symbol == metasymbol_one {
550 captures.insert(captures_index, &input[input_index..=input_index]);
552 captures_index += 1;
553 input_index += 1;
554 } else {
555 input_index += 1;
556 }
557
558 i += 1;
559 }
560}
561
562#[inline(always)]
563fn is_symbol<S>(v: S, opt_symbol: Option<S>) -> bool
564where
565 S: Eq + Copy,
566{
567 match opt_symbol {
568 None => false,
569 Some(u) => u == v,
570 }
571}
572
573#[inline]
589fn matches<S>(
590 config: &WildcardMatchingConfig<S>,
591 pattern: &[S],
592 input: &[S],
593 symbol_eq: impl Fn(S, S) -> bool,
594 mut capture: impl FnMut(Range<usize>),
595) -> bool
596where
597 S: Eq + Copy,
598{
599 let symbol_escape = config.symbol_escape;
603 let metasymbol_any = config.metasymbol_any;
604 let metasymbol_one = config.metasymbol_one;
605
606 let pattern_len = pattern.len();
607 let input_len = input.len();
608 let mut pattern_index = 0;
609 let mut input_index = 0;
610
611 let mut revert_pattern_index = 0;
613 let mut revert_input_index = 0;
618 let mut last_star_input_index = 0;
621
622 while input_index < input_len {
623 let mut match_failed = false;
624
625 if pattern_index >= pattern_len {
626 match_failed = true;
627 } else {
628 let mut pattern_symbol = pattern[pattern_index];
629 let mut escape = false;
630
631 if is_symbol(pattern_symbol, symbol_escape) {
633 pattern_index += 1;
636 pattern_symbol = pattern[pattern_index];
637 escape = true;
638 }
639
640 if pattern_symbol == metasymbol_any && !escape {
641 if revert_pattern_index > 0 {
643 capture(last_star_input_index..revert_input_index);
644 }
645
646 pattern_index += 1;
647
648 while pattern_index < pattern_len && pattern[pattern_index] == metasymbol_any {
650 capture(input_index..input_index);
651 pattern_index += 1;
652 }
653
654 if pattern_index >= pattern_len {
655 capture(input_index..input_len);
658 return true;
659 }
660
661 debug_assert!(pattern_index < pattern_len);
662
663 let pattern_symbol = pattern[pattern_index];
664
665 debug_assert!(pattern_symbol != metasymbol_any);
666
667 last_star_input_index = input_index;
668
669 if !is_symbol(pattern_symbol, metasymbol_one)
672 && !is_symbol(pattern_symbol, symbol_escape)
673 {
674 while input_index < input_len && !symbol_eq(pattern_symbol, input[input_index])
675 {
676 input_index += 1;
677 }
678 }
679
680 revert_pattern_index = pattern_index;
682 revert_input_index = input_index;
683 } else if !symbol_eq(pattern_symbol, input[input_index])
684 && (!is_symbol(pattern_symbol, metasymbol_one) || escape)
685 {
686 match_failed = true;
687 } else {
688 pattern_index += 1;
689 input_index += 1;
690 }
691 }
692
693 if match_failed {
694 if revert_pattern_index == 0 {
701 return false;
702 }
703
704 revert_input_index += 1;
706
707 debug_assert!(revert_pattern_index < pattern_len);
708
709 let pattern_symbol = pattern[revert_pattern_index];
710
711 debug_assert!(pattern_symbol != metasymbol_any);
712
713 if !is_symbol(pattern_symbol, metasymbol_one)
715 && !is_symbol(pattern_symbol, symbol_escape)
716 {
717 while revert_input_index < input_len
718 && !symbol_eq(pattern_symbol, input[revert_input_index])
719 {
720 revert_input_index += 1;
721 }
722 }
723
724 pattern_index = revert_pattern_index;
725 input_index = revert_input_index;
726 }
727 }
728
729 if revert_pattern_index > 0 {
731 capture(last_star_input_index..revert_input_index);
732 }
733
734 while pattern_index < pattern_len && pattern[pattern_index] == metasymbol_any {
736 capture(input_index..input_index);
737 pattern_index += 1;
738 }
739
740 pattern_index >= pattern_len
742}
743
744impl<'a, S> Debug for Wildcard<'a, S>
745where
746 S: WildcardSymbol + Display,
747{
748 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
749 for s in self.pattern.iter() {
750 Display::fmt(s, f)?;
751 }
752
753 Ok(())
754 }
755}
756
757#[cfg(test)]
758mod tests {
759 use crate::{Wildcard, WildcardBuilder, WildcardError, WildcardToken};
760 use alloc::string::String;
761 use alloc::vec::Vec;
762 use alloc::{format, vec};
763 use quickcheck::{Arbitrary, Gen};
764 use quickcheck_macros::quickcheck;
765
766 fn vec_u8_to_vec_string(v: Vec<&[u8]>) -> Vec<String> {
767 v.into_iter().map(|c| String::from_utf8_lossy(c).into_owned()).collect()
768 }
769
770 pub mod engine_regex_bytes {
771 use crate::{Wildcard, WildcardToken};
772 use alloc::borrow::ToOwned;
773 use alloc::string::String;
774 use alloc::vec::Vec;
775 use regex::bytes::{Regex, RegexBuilder};
776
777 fn make_regex(pattern: &str, case_insensitive: bool) -> Regex {
778 let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
779 let mut regex_pattern = "^".to_owned();
780
781 for token in wildcard.parsed() {
782 match token {
783 WildcardToken::MetasymbolAny => {
784 regex_pattern.push_str("(.*?)");
785 }
786 WildcardToken::MetasymbolOne => {
787 regex_pattern.push_str("(.)");
788 }
789 WildcardToken::Symbol(s) => {
790 let slice = &[s];
791 let s = core::str::from_utf8(slice).expect("invalid utf-8 symbol");
792 regex_pattern.push_str(®ex::escape(s));
793 }
794 }
795 }
796
797 regex_pattern.push('$');
798
799 RegexBuilder::new(®ex_pattern)
800 .dot_matches_new_line(true)
801 .crlf(false)
802 .unicode(false)
803 .case_insensitive(case_insensitive)
804 .build()
805 .expect("invalid regex")
806 }
807
808 pub fn matches(pattern: &str, input: &[u8], case_insensitive: bool) -> bool {
809 make_regex(pattern, case_insensitive).is_match(input)
810 }
811
812 pub fn captures<'a>(
813 pattern: &str,
814 input: &'a [u8],
815 case_insensitive: bool,
816 ) -> Option<Vec<&'a [u8]>> {
817 make_regex(pattern, case_insensitive).captures(input).map(|c| {
818 c.iter().flat_map(IntoIterator::into_iter).skip(1).map(|m| m.as_bytes()).collect()
819 })
820 }
821
822 pub fn captures_as_string(
823 pattern: &str,
824 input: &[u8],
825 case_insensitive: bool,
826 ) -> Option<Vec<String>> {
827 captures(pattern, input, case_insensitive)
828 .map(|c| c.iter().map(|m| String::from_utf8_lossy(m).into_owned()).collect())
829 }
830 }
831
832 #[derive(serde::Deserialize, Debug)]
833 struct MatchTest {
834 pattern: String,
835 input: String,
836 matches: bool,
837 #[serde(default, rename = "case-insensitive")]
838 case_insensitive: bool,
839 }
840
841 #[derive(serde::Deserialize)]
842 struct MatchTests {
843 #[serde(rename = "test")]
844 tests: Vec<MatchTest>,
845 }
846
847 fn run_matching_tests(tests: MatchTests) -> Vec<String> {
848 let mut errors = Vec::new();
849
850 assert!(!tests.tests.is_empty());
851
852 for test in tests.tests {
853 let is_match = WildcardBuilder::new(test.pattern.as_bytes())
854 .case_insensitive(test.case_insensitive)
855 .build()
856 .expect("invalid wildcard")
857 .is_match(test.input.as_bytes());
858
859 assert_eq!(
860 engine_regex_bytes::matches(
861 &test.pattern,
862 test.input.as_bytes(),
863 test.case_insensitive
864 ),
865 test.matches,
866 "it seems the test itself is wrong: {test:?}",
867 );
868
869 match (test.matches, is_match) {
870 (true, false) => errors.push(format!(
871 r#"wildcard "{}" should have matched "{}""#,
872 test.pattern, test.input
873 )),
874 (false, true) => errors.push(format!(
875 r#"wildcard "{}" should not have matched "{}""#,
876 test.pattern, test.input
877 )),
878 (true, true) | (false, false) => (),
879 }
880 }
881
882 errors
883 }
884
885 #[derive(serde::Deserialize, Debug)]
886 struct CaptureTest {
887 pattern: String,
888 input: String,
889 captures: Vec<String>,
890 #[serde(default, rename = "case-insensitive")]
891 case_insensitive: bool,
892 }
893
894 impl CaptureTest {
895 fn from_matching_test(matching_test: MatchTest) -> Option<CaptureTest> {
896 if !matching_test.matches {
897 return None;
898 }
899
900 let captures = engine_regex_bytes::captures_as_string(
901 &matching_test.pattern,
902 matching_test.input.as_bytes(),
903 matching_test.case_insensitive,
904 )
905 .unwrap_or_else(|| {
906 panic!("the test should match but captures() returned None: {matching_test:?}")
907 });
908
909 Some(CaptureTest {
910 pattern: matching_test.pattern,
911 input: matching_test.input,
912 captures,
913 case_insensitive: matching_test.case_insensitive,
914 })
915 }
916 }
917
918 #[derive(serde::Deserialize)]
919 struct CaptureTests {
920 #[serde(rename = "test")]
921 tests: Vec<CaptureTest>,
922 }
923
924 fn run_capture_tests(tests: CaptureTests) -> Vec<String> {
925 let mut errors = Vec::new();
926
927 assert!(!tests.tests.is_empty());
928
929 for test in tests.tests {
930 let wildcard = WildcardBuilder::new(test.pattern.as_bytes())
931 .case_insensitive(test.case_insensitive)
932 .build()
933 .expect("invalid wildcard");
934
935 assert_eq!(
936 engine_regex_bytes::captures_as_string(
937 &test.pattern,
938 test.input.as_bytes(),
939 test.case_insensitive
940 )
941 .as_ref(),
942 Some(&test.captures),
943 "it seems the test itself is wrong: {test:?}",
944 );
945
946 let captures: Option<Vec<String>> =
947 wildcard.captures(test.input.as_bytes()).map(vec_u8_to_vec_string);
948
949 match captures {
950 Some(captures) if captures != test.captures => errors.push(format!(
951 r#"captures for wildcard "{}" with input "{}" should have been {:?} instead of {:?}"#,
952 test.pattern, test.input, test.captures, captures,
953 )),
954 None => errors.push(format!(
955 "we did not get a match on the capture test {test:?}"
956 )),
957 Some(_) => (),
958 }
959 }
960
961 errors
962 }
963
964 #[test]
965 fn test_matching() {
966 let tests = toml::from_str::<MatchTests>(include_str!("../testdata/matching.toml"))
967 .expect("failed to parse test file");
968
969 let errors = run_matching_tests(tests);
970
971 assert!(errors.is_empty(), "{} tests failed:\n{}", errors.len(), errors.join("\n"));
972 }
973
974 #[test]
975 fn test_capture() {
976 let tests = toml::from_str::<CaptureTests>(include_str!("../testdata/capture.toml"))
977 .expect("failed to parse test file");
978
979 let errors = run_capture_tests(tests);
980
981 assert!(errors.is_empty(), "{} tests failed:\n{}", errors.len(), errors.join("\n"));
982
983 let tests = {
985 let t = toml::from_str::<MatchTests>(include_str!("../testdata/matching.toml"))
986 .expect("failed to parse test file");
987
988 CaptureTests {
989 tests: t.tests.into_iter().filter_map(CaptureTest::from_matching_test).collect(),
990 }
991 };
992
993 let errors = run_capture_tests(tests);
994
995 assert!(errors.is_empty(), "{} tests failed:\n{}", errors.len(), errors.join("\n"));
996 }
997
998 #[test]
999 fn test_syntax_validation() {
1000 let pattern = r"\foo";
1001 let expected = WildcardError::Syntax { position: 0, message: r"invalid escape sequence" };
1002
1003 assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1004
1005 let pattern = r"f\oo";
1006 let expected = WildcardError::Syntax { position: 1, message: r"invalid escape sequence" };
1007
1008 assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1009
1010 let pattern = r"foo\";
1011 let expected = WildcardError::Syntax {
1012 position: 3,
1013 message: "incomplete escape sequence at the end of the wildcard",
1014 };
1015
1016 assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1017
1018 let pattern = r"foo\\\";
1019 let expected = WildcardError::Syntax {
1020 position: 5,
1021 message: "incomplete escape sequence at the end of the wildcard",
1022 };
1023
1024 assert_eq!(Wildcard::new(pattern.as_bytes()).err(), Some(expected));
1025
1026 let pattern = r"f\?oo";
1027 let expected = WildcardError::Syntax { position: 1, message: r"invalid escape sequence" };
1028 let wildcard = WildcardBuilder::new(pattern.as_bytes()).without_one_metasymbol().build();
1029
1030 assert_eq!(wildcard.err(), Some(expected));
1031
1032 let pattern = r"f\?oo";
1033 let expected = WildcardError::Syntax { position: 1, message: r"invalid escape sequence" };
1034 let wildcard = WildcardBuilder::new(pattern.as_bytes()).with_one_metasymbol(b'!').build();
1035
1036 assert_eq!(wildcard.err(), Some(expected));
1037
1038 let pattern = r"f\*\\o\?o";
1039
1040 assert!(Wildcard::new(pattern.as_bytes()).is_ok());
1041
1042 let pattern = r"f?oo";
1043 let wildcard = WildcardBuilder::new(pattern.as_bytes()).without_one_metasymbol().build();
1044
1045 assert!(wildcard.is_ok());
1046
1047 let pattern = r"f\!o!o";
1048 let wildcard = WildcardBuilder::new(pattern.as_bytes()).with_one_metasymbol(b'!').build();
1049
1050 assert!(wildcard.is_ok());
1051
1052 let pattern = r"f\o\\o\";
1053 let wildcard = WildcardBuilder::new(pattern.as_bytes()).without_escape().build();
1054
1055 assert!(wildcard.is_ok());
1056 }
1057
1058 #[test]
1059 fn test_configuration_repeated_special_symbols() {
1060 let wildcard = WildcardBuilder::new(&[]).with_escape_symbol(b'*').build();
1061
1062 assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1063
1064 let wildcard = WildcardBuilder::new(&[]).with_escape_symbol(b'?').build();
1065
1066 assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1067
1068 let wildcard = WildcardBuilder::new(&[]).with_one_metasymbol(b'*').build();
1069
1070 assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1071
1072 let wildcard = WildcardBuilder::new(&[]).with_one_metasymbol(b'\\').build();
1073
1074 assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1075
1076 let wildcard = WildcardBuilder::new(&[]).with_any_metasymbol(b'?').build();
1077
1078 assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1079
1080 let wildcard = WildcardBuilder::new(&[]).with_any_metasymbol(b'\\').build();
1081
1082 assert_eq!(wildcard.err(), Some(WildcardError::InvalidSpecialSymbolsConfiguration));
1083 }
1084
1085 #[test]
1086 fn test_wildcard_parsed() {
1087 let wildcard = Wildcard::new(r"a\\b?*c\?d\*".as_bytes()).unwrap();
1088 let expected = [
1089 WildcardToken::Symbol(b'a'),
1090 WildcardToken::Symbol(b'\\'),
1091 WildcardToken::Symbol(b'b'),
1092 WildcardToken::MetasymbolOne,
1093 WildcardToken::MetasymbolAny,
1094 WildcardToken::Symbol(b'c'),
1095 WildcardToken::Symbol(b'?'),
1096 WildcardToken::Symbol(b'd'),
1097 WildcardToken::Symbol(b'*'),
1098 ];
1099
1100 assert_eq!(wildcard.parsed().collect::<Vec<_>>(), expected);
1101
1102 let wildcard =
1103 WildcardBuilder::new(r"*?\\".as_bytes()).without_one_metasymbol().build().unwrap();
1104 let expected = [
1105 WildcardToken::MetasymbolAny,
1106 WildcardToken::Symbol(b'?'),
1107 WildcardToken::Symbol(b'\\'),
1108 ];
1109
1110 assert_eq!(wildcard.parsed().collect::<Vec<_>>(), expected);
1111
1112 let wildcard = WildcardBuilder::new(r"*?\".as_bytes()).without_escape().build().unwrap();
1113 let expected = [
1114 WildcardToken::MetasymbolAny,
1115 WildcardToken::MetasymbolOne,
1116 WildcardToken::Symbol(b'\\'),
1117 ];
1118
1119 assert_eq!(wildcard.parsed().collect::<Vec<_>>(), expected);
1120 }
1121
1122 #[test]
1123 fn test_matching_chars() {
1124 let pattern = "a*c".chars().collect::<Vec<char>>();
1125 let wildcard = Wildcard::new(&pattern).expect("invalid wildcard");
1126
1127 assert!(wildcard.is_match(&['a', 'c']));
1128 assert!(!wildcard.is_match(&['a', 'b']));
1129 assert!(wildcard.is_match(&['a', 'b', 'c']));
1130 assert!(wildcard.is_match(&['a', 'b', 'b', 'c']));
1131 }
1132
1133 #[test]
1134 fn test_matching_chars_case_insensitive() {
1135 let pattern = "ω*δ".chars().collect::<Vec<char>>();
1136 let wildcard = WildcardBuilder::new(&pattern)
1137 .case_insensitive(true)
1138 .build()
1139 .expect("invalid wildcard");
1140
1141 assert!(wildcard.is_match(&['ω', 'Δ']));
1142 assert!(!wildcard.is_match(&['ω', 'x']));
1143 assert!(wildcard.is_match(&['Ω', 'x', 'δ']));
1144 assert!(wildcard.is_match(&['ω', 'x', 'x', 'Δ']));
1145 }
1146
1147 #[test]
1148 fn test_capture_chars_case_insensitive() {
1149 let pattern = "ω*δ".chars().collect::<Vec<char>>();
1150 let wildcard = WildcardBuilder::new(&pattern)
1151 .case_insensitive(true)
1152 .build()
1153 .expect("invalid wildcard");
1154
1155 assert_eq!(wildcard.captures(&['ω', 'Δ']), Some(vec![&[] as &[char]]));
1156 assert_eq!(wildcard.captures(&['ω', 'x']), None);
1157 assert_eq!(wildcard.captures(&['Ω', 'x', 'X', 'Δ']), Some(vec![&['x', 'X'] as &[char]]));
1158 assert_eq!(wildcard.captures(&['ω', 'ω', 'Ω', 'Δ']), Some(vec![&['ω', 'Ω'] as &[char]]));
1159 }
1160
1161 #[test]
1162 fn test_debug_format() {
1163 let pattern = r"a*\*?\?".chars().collect::<Vec<char>>();
1164 let wildcard = WildcardBuilder::new(&pattern).build().expect("invalid wildcard");
1165
1166 assert_eq!(format!("{wildcard:?}"), r"a*\*?\?");
1167 }
1168
1169 #[derive(Clone, Debug)]
1170 struct WildcardAndInput {
1171 pattern: String,
1172 input: String,
1173 }
1174
1175 struct NormalCharGen {
1176 chars_range: usize,
1177 }
1178
1179 impl NormalCharGen {
1180 const ALPHA_CHARS: &'static str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
1181
1182 fn new(gen: &mut Gen) -> NormalCharGen {
1183 let chars_range = 1 + match u8::arbitrary(gen) % 100 {
1185 0..=10 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 2),
1186 11..=20 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 4),
1187 21..=30 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 6),
1188 31..=40 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 8),
1189 41..=50 => usize::arbitrary(gen) % (1 + usize::arbitrary(gen) % 10),
1190 _ => usize::arbitrary(gen) % NormalCharGen::ALPHA_CHARS.len(),
1191 };
1192
1193 NormalCharGen { chars_range }
1194 }
1195
1196 fn gen(&self, gen: &mut Gen) -> &'static str {
1197 let index = usize::arbitrary(gen) % self.chars_range;
1198 &NormalCharGen::ALPHA_CHARS[index..=index]
1199 }
1200
1201 fn gen_maybe_special(&self, gen: &mut Gen) -> &'static str {
1202 match u8::arbitrary(gen) % 100 {
1203 0..=1 => "*",
1204 2..=3 => "?",
1205 4..=5 => r"\",
1206 _ => self.gen(gen),
1207 }
1208 }
1209
1210 fn widen(&self, gen: &mut Gen) -> NormalCharGen {
1211 NormalCharGen {
1212 chars_range: 1
1213 + (self.chars_range + (usize::arbitrary(gen) % 5))
1214 % NormalCharGen::ALPHA_CHARS.len(),
1215 }
1216 }
1217 }
1218
1219 fn arbitrary_length(gen: &mut Gen) -> usize {
1220 match u8::arbitrary(gen) % 100 {
1221 0..=15 => 0,
1222 16..=30 => 1 + usize::arbitrary(gen) % 5,
1223 31..=50 => 1 + usize::arbitrary(gen) % 10,
1224 _ => 1 + usize::arbitrary(gen) % gen.size(),
1225 }
1226 }
1227
1228 fn wildcard_tokens_to_pattern(tokens: &[WildcardToken]) -> String {
1229 let mut pattern = String::with_capacity(2 * tokens.len());
1230
1231 for token in tokens {
1232 match token {
1233 WildcardToken::MetasymbolAny => pattern.push('*'),
1234 WildcardToken::MetasymbolOne => pattern.push('?'),
1235 WildcardToken::Symbol(b) => match b {
1236 b'*' => pattern.push_str(r"\*"),
1237 b'?' => pattern.push_str(r"\?"),
1238 b'\\' => pattern.push_str(r"\\"),
1239 b => pattern.push(char::from(*b)),
1240 },
1241 }
1242 }
1243
1244 pattern
1245 }
1246
1247 fn create_input_matching_pattern(
1248 gen: &mut Gen,
1249 normal_chars_gen: &NormalCharGen,
1250 pattern: &[WildcardToken],
1251 ) -> String {
1252 let normal_chars_gen = normal_chars_gen.widen(gen);
1254 let mut input = String::with_capacity(pattern.len());
1255
1256 for token in pattern {
1257 match token {
1258 WildcardToken::MetasymbolAny => {
1259 let len = arbitrary_length(gen);
1260
1261 (0..len).for_each(|_| input.push_str(normal_chars_gen.gen_maybe_special(gen)));
1262 }
1263 WildcardToken::MetasymbolOne => {
1264 input.push_str(normal_chars_gen.gen_maybe_special(gen));
1265 }
1266 WildcardToken::Symbol(b) => {
1267 input.push(char::from(*b));
1268 }
1269 }
1270 }
1271
1272 input
1273 }
1274
1275 fn arbitrary_wildcard_token(gen: &mut Gen, normal_chars_gen: &NormalCharGen) -> WildcardToken {
1276 match u8::arbitrary(gen) % 100 {
1277 0..=20 => WildcardToken::MetasymbolAny,
1278 21..=30 => WildcardToken::MetasymbolOne,
1279 _ => {
1280 match u8::arbitrary(gen) % 100 {
1283 0..=5 => WildcardToken::Symbol(b'*'),
1284 6..=10 => WildcardToken::Symbol(b'?'),
1285 11..=15 => WildcardToken::Symbol(b'\\'),
1286 _ => WildcardToken::Symbol(normal_chars_gen.gen(gen).as_bytes()[0]),
1287 }
1288 }
1289 }
1290 }
1291
1292 fn arbitrary_wildcard_tokens(
1293 gen: &mut Gen,
1294 normal_chars_gen: &NormalCharGen,
1295 ) -> Vec<WildcardToken> {
1296 let len = arbitrary_length(gen);
1297
1298 (0..len).map(|_| arbitrary_wildcard_token(gen, normal_chars_gen)).collect()
1299 }
1300
1301 fn arbitrary_input(gen: &mut Gen, normal_chars_gen: &NormalCharGen) -> String {
1302 let normal_chars_gen = normal_chars_gen.widen(gen);
1304 let len = arbitrary_length(gen);
1305
1306 (0..len).map(|_| normal_chars_gen.gen_maybe_special(gen)).collect()
1307 }
1308
1309 impl Arbitrary for WildcardAndInput {
1310 fn arbitrary(gen: &mut Gen) -> WildcardAndInput {
1311 let will_match = bool::arbitrary(gen);
1312
1313 match will_match {
1314 true => {
1315 let WildcardAndMatchingInput { pattern, input } =
1316 WildcardAndMatchingInput::arbitrary(gen);
1317
1318 WildcardAndInput { pattern, input }
1319 }
1320 false => {
1321 let normal_chars_gen = NormalCharGen::new(gen);
1322 let pattern_tokens = arbitrary_wildcard_tokens(gen, &normal_chars_gen);
1323 let pattern = wildcard_tokens_to_pattern(&pattern_tokens);
1324
1325 let input = arbitrary_input(gen, &normal_chars_gen);
1326
1327 WildcardAndInput { pattern, input }
1328 }
1329 }
1330 }
1331 }
1332
1333 #[derive(Clone, Debug)]
1335 struct WildcardAndMatchingInput {
1336 pattern: String,
1337 input: String,
1338 }
1339
1340 impl Arbitrary for WildcardAndMatchingInput {
1341 fn arbitrary(gen: &mut Gen) -> WildcardAndMatchingInput {
1342 let normal_chars_gen = NormalCharGen::new(gen);
1343 let pattern_tokens = arbitrary_wildcard_tokens(gen, &normal_chars_gen);
1344 let pattern = wildcard_tokens_to_pattern(&pattern_tokens);
1345
1346 let input = {
1347 let input = create_input_matching_pattern(gen, &normal_chars_gen, &pattern_tokens);
1348
1349 assert!(
1350 engine_regex_bytes::matches(&pattern, input.as_bytes(), false),
1351 "failed to create an input that matched the pattern\npattern: {pattern}\ninput : {input}",
1352 );
1353
1354 input
1355 };
1356
1357 WildcardAndMatchingInput { pattern, input }
1358 }
1359 }
1360
1361 #[quickcheck]
1362 fn property_matching_equivalent_to_regex_engine(
1363 WildcardAndInput { pattern, input }: WildcardAndInput,
1364 ) -> bool {
1365 let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1366
1367 wildcard.is_match(input.as_bytes())
1368 == engine_regex_bytes::matches(&pattern, input.as_bytes(), false)
1369 }
1370
1371 #[quickcheck]
1372 fn property_capture_equivalent_to_regex_engine(
1373 WildcardAndInput { pattern, input }: WildcardAndInput,
1374 ) -> bool {
1375 let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1376
1377 wildcard.captures(input.as_bytes())
1378 == engine_regex_bytes::captures(&pattern, input.as_bytes(), false)
1379 }
1380
1381 #[quickcheck]
1382 fn property_capture_length_equals_number_of_metacharacters(
1383 WildcardAndMatchingInput { pattern, input }: WildcardAndMatchingInput,
1384 ) -> bool {
1385 let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1386 let metacharacter_count = wildcard.parsed().filter(|token| token.is_metasymbol()).count();
1387
1388 let captures = wildcard
1389 .captures(input.as_bytes())
1390 .map(|c| c.len())
1391 .expect("this test should only get matching test cases");
1392
1393 captures == metacharacter_count
1394 }
1395
1396 #[quickcheck]
1397 fn property_captures_agree_with_is_match(
1398 WildcardAndInput { pattern, input }: WildcardAndInput,
1399 ) -> bool {
1400 let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1401
1402 let captures = wildcard.captures(input.as_bytes()).map(|c| c.len());
1403
1404 captures.is_some() == wildcard.is_match(input.as_bytes())
1405 }
1406
1407 #[quickcheck]
1408 fn property_captures_filled_in_metasymbols_matches_input(
1409 WildcardAndMatchingInput { pattern, input }: WildcardAndMatchingInput,
1410 ) -> bool {
1411 let wildcard = Wildcard::new(pattern.as_bytes()).expect("invalid wildcard");
1412
1413 let mut captures = wildcard
1414 .captures(input.as_bytes())
1415 .expect("this test should only get matching test cases")
1416 .into_iter();
1417
1418 let mut pattern_filled_in = String::with_capacity(input.len());
1419
1420 for token in wildcard.parsed() {
1421 match token {
1422 WildcardToken::MetasymbolAny | WildcardToken::MetasymbolOne => {
1423 let fill = captures.next().expect("must be present");
1424
1425 pattern_filled_in.push_str(core::str::from_utf8(fill).expect("invalid utf-8"));
1426 }
1427 WildcardToken::Symbol(b) => pattern_filled_in.push(char::from(b)),
1428 }
1429 }
1430
1431 pattern_filled_in == input
1432 }
1433}