1#![allow(
4 clippy::redundant_else,
5 clippy::missing_errors_doc,
6 clippy::match_same_arms,
7 clippy::missing_panics_doc,
8 clippy::too_many_lines,
9 clippy::items_after_statements,
10 clippy::float_cmp,
11 clippy::allow_attributes,
12 let_underscore_drop
13)]
14
15use crate::types::FuzzyLimits;
16
17use super::ast::{
18 Anchor, Ast, CharClass, CharClassItem, Fuzziness, MatchFlags, MrabFuzziness, NamedClass,
19 Quantifier,
20};
21use super::lexer::{Lexer, NamedClassToken, Token};
22use crate::error::{Error, Result};
23
24pub struct Parser<'a> {
26 lexer: Lexer<'a>,
27 capture_count: usize,
28 flags: MatchFlags,
30}
31
32impl<'a> Parser<'a> {
33 pub fn new(pattern: &'a str) -> Self {
35 Parser {
36 lexer: Lexer::new(pattern),
37 capture_count: 0,
38 flags: MatchFlags::default(),
39 }
40 }
41
42 pub fn new_with_flags(pattern: &'a str, verbose: bool, dot_all: bool, ungreedy: bool) -> Self {
44 Parser {
45 lexer: Lexer::new_with_flags(pattern, verbose),
46 capture_count: 0,
47 flags: MatchFlags {
48 verbose,
49 dot_all,
50 ungreedy,
51 ..Default::default()
52 },
53 }
54 }
55
56 pub fn parse(mut self) -> Result<Ast> {
58 let ast = self.parse_alternation()?;
59
60 if !matches!(self.lexer.peek_token()?, Token::Eof) {
61 return Err(Error::parse(
62 self.lexer.position(),
63 "unexpected token after pattern",
64 ));
65 }
66
67 Ok(ast)
68 }
69
70 #[cfg(test)]
72 #[allow(dead_code)]
73 pub fn capture_count(&self) -> usize {
74 self.capture_count
75 }
76
77 #[cfg(test)]
79 #[allow(dead_code)]
80 pub fn flags(&self) -> MatchFlags {
81 self.flags
82 }
83
84 fn parse_alternation(&mut self) -> Result<Ast> {
86 let mut alternatives = vec![self.parse_concat()?];
87
88 while matches!(self.lexer.peek_token()?, Token::Pipe) {
89 self.lexer.next_token()?; alternatives.push(self.parse_concat()?);
91 }
92
93 if alternatives.len() == 1 {
94 Ok(alternatives.pop().unwrap())
95 } else {
96 Ok(Ast::Alternation(alternatives))
97 }
98 }
99
100 fn parse_concat(&mut self) -> Result<Ast> {
102 let mut parts = Vec::new();
103
104 loop {
105 match self.lexer.peek_token()? {
106 Token::Eof | Token::Pipe | Token::CloseParen => break,
107 _ => parts.push(self.parse_quantified()?),
108 }
109 }
110
111 match parts.len() {
112 0 => Ok(Ast::Empty),
113 1 => Ok(parts.pop().unwrap()),
114 _ => {
115 let merged = Self::merge_literals(parts);
117 if merged.len() == 1 {
118 Ok(merged.into_iter().next().unwrap())
119 } else {
120 Ok(Ast::Concat(merged))
121 }
122 }
123 }
124 }
125
126 fn merge_literals(parts: Vec<Ast>) -> Vec<Ast> {
129 let mut result = Vec::new();
130 let mut current_literal = String::new();
131 let mut pending_fuzziness: Option<Fuzziness> = None;
132
133 for part in parts {
134 match part {
135 Ast::Char(ch) => {
136 if pending_fuzziness.is_some() {
138 let fuzz = pending_fuzziness.take().unwrap();
139 result.push(Ast::Literal {
140 text: std::mem::take(&mut current_literal),
141 fuzziness: fuzz,
142 });
143 }
144 current_literal.push(ch);
145 }
146 Ast::Literal { text, fuzziness } => {
147 if fuzziness == Fuzziness::Inherited {
148 if pending_fuzziness.is_some() {
150 let fuzz = pending_fuzziness.take().unwrap();
151 result.push(Ast::Literal {
152 text: std::mem::take(&mut current_literal),
153 fuzziness: fuzz,
154 });
155 }
156 current_literal.push_str(&text);
157 } else {
158 current_literal.push_str(&text);
161 pending_fuzziness = Some(fuzziness);
162 }
163 }
164 other => {
165 if !current_literal.is_empty() {
167 let fuzz = pending_fuzziness.take().unwrap_or(Fuzziness::Inherited);
168 result.push(Ast::Literal {
169 text: std::mem::take(&mut current_literal),
170 fuzziness: fuzz,
171 });
172 }
173 result.push(other);
174 }
175 }
176 }
177
178 if !current_literal.is_empty() {
180 let fuzz = pending_fuzziness.unwrap_or(Fuzziness::Inherited);
181 result.push(Ast::Literal {
182 text: current_literal,
183 fuzziness: fuzz,
184 });
185 }
186
187 result
188 }
189
190 fn parse_quantified(&mut self) -> Result<Ast> {
192 let mut expr = self.parse_atom()?;
193
194 if matches!(&expr, Ast::Literal { .. } | Ast::Char(_))
196 && matches!(self.lexer.peek_token()?, Token::Tilde)
197 {
198 self.lexer.next_token()?; let fuzziness = self.parse_fuzziness()?;
200
201 let text = match expr {
202 Ast::Literal { text, .. } => text,
203 Ast::Char(ch) => ch.to_string(),
204 _ => unreachable!(),
205 };
206
207 expr = Ast::Literal { text, fuzziness };
208 }
209
210 match self.lexer.peek_token()? {
212 Token::Star | Token::Plus | Token::Question | Token::OpenBrace => {
213 let (quantifier, greedy) = self.parse_quantifier()?;
214 Ok(Ast::quantified(expr, quantifier, greedy))
215 }
216 _ => Ok(expr),
217 }
218 }
219
220 fn parse_quantifier(&mut self) -> Result<(Quantifier, bool)> {
222 let token = self.lexer.next_token()?;
224
225 let is_possessive = match &token {
229 Token::Star | Token::Plus | Token::Question => {
230 if matches!(self.lexer.peek_token()?, Token::Plus) {
232 let pos = self.lexer.position();
235 self.lexer.next_token()?; let followed_by_question = matches!(self.lexer.peek_token()?, Token::Question);
237 self.lexer.set_position(pos);
239 if followed_by_question {
240 return Err(Error::parse(
241 self.lexer.position(),
242 "possessive quantifier cannot be followed by '?'",
243 ));
244 }
245 true } else {
247 false
248 }
249 }
250 _ => false,
251 };
252
253 let quantifier = match token {
254 Token::Star => {
255 if is_possessive {
256 self.lexer.next_token()?; Quantifier::ZeroOrMore
258 } else {
259 Quantifier::ZeroOrMore
260 }
261 }
262 Token::Plus => {
263 if is_possessive {
264 self.lexer.next_token()?; Quantifier::OneOrMore
266 } else {
267 Quantifier::OneOrMore
268 }
269 }
270 Token::Question => {
271 if is_possessive {
272 self.lexer.next_token()?; Quantifier::ZeroOrOne
274 } else {
275 Quantifier::ZeroOrOne
276 }
277 }
278 Token::OpenBrace => self.parse_brace_quantifier()?,
279 _ => unreachable!(),
280 };
281
282 let has_modifier = matches!(self.lexer.peek_token()?, Token::Question);
286 if has_modifier {
287 self.lexer.next_token()?;
288 }
289
290 let greedy = if self.flags.ungreedy {
294 has_modifier } else {
296 !has_modifier };
298
299 Ok((quantifier, greedy))
300 }
301
302 fn parse_brace_quantifier(&mut self) -> Result<Quantifier> {
304 let start_pos = self.lexer.position();
305
306 let min = self.parse_number_usize()?;
308
309 match self.lexer.peek_token()? {
310 Token::CloseBrace => {
311 self.lexer.next_token()?;
312 Ok(Quantifier::Exactly(min))
313 }
314 Token::Char(',') => {
315 self.lexer.next_token()?; if self.lexer.peek_token()? == Token::CloseBrace {
318 self.lexer.next_token()?;
319 Ok(Quantifier::AtLeast(min))
320 } else {
321 let max = self.parse_number_usize()?;
322 if max < min {
323 return Err(Error::invalid_quantifier(
324 start_pos,
325 format!("max ({max}) cannot be less than min ({min})"),
326 ));
327 }
328 match self.lexer.next_token()? {
329 Token::CloseBrace => Ok(Quantifier::Between(min, max)),
330 _ => Err(Error::unclosed("quantifier", start_pos)),
331 }
332 }
333 }
334 _ => Err(Error::invalid_quantifier(
335 start_pos,
336 "expected ',' or '}' in quantifier",
337 )),
338 }
339 }
340
341 fn parse_number_usize(&mut self) -> Result<usize> {
343 let start_pos = self.lexer.position();
344 let mut num = 0usize;
345 let mut found_digit = false;
346
347 while let Token::Char(ch) = self.lexer.peek_token()? {
348 let Some(digit) = ch.to_digit(10).and_then(|x| u8::try_from(x).ok()) else {
349 break;
350 };
351 found_digit = true;
352 num = num
353 .checked_mul(10)
354 .and_then(|n| n.checked_add(digit as usize))
355 .ok_or_else(|| Error::invalid_quantifier(start_pos, "number too large"))?;
356 self.lexer.next_token()?;
357 }
358
359 if !found_digit {
360 return Err(Error::invalid_quantifier(start_pos, "expected number"));
361 }
362
363 Ok(num)
364 }
365
366 fn parse_number_u8(&mut self) -> Result<u8> {
368 let start_pos = self.lexer.position();
369 let mut num = 0u8;
370 let mut found_digit = false;
371
372 while let Token::Char(ch) = self.lexer.peek_token()? {
373 let digit = ch.to_string().parse::<u8>().ok();
374 let Some(digit) = digit else {
375 break;
376 };
377 found_digit = true;
378 num = num
379 .checked_mul(10)
380 .and_then(|n| n.checked_add(digit))
381 .ok_or_else(|| Error::invalid_quantifier(start_pos, "number too large"))?;
382 self.lexer.next_token()?;
383 }
384
385 if !found_digit {
386 return Err(Error::invalid_quantifier(start_pos, "expected number"));
387 }
388
389 Ok(num)
390 }
391
392 fn parse_fuzziness(&mut self) -> Result<Fuzziness> {
394 let start_pos = self.lexer.position();
395
396 match self.lexer.peek_token()? {
397 Token::OpenBrace => {
398 self.lexer.next_token()?; let limits = self.parse_detailed_fuzziness()?;
400 Ok(Fuzziness::Detailed(limits))
401 }
402 Token::Char(ch) if ch.is_ascii_digit() => {
403 let num = self.parse_number_u8()?;
404 if num == 0 {
405 Ok(Fuzziness::Exact)
406 } else {
407 Ok(Fuzziness::Edits(num))
408 }
409 }
410 _ => Err(Error::invalid_fuzziness(
411 start_pos,
412 "expected number or '{' after '~'",
413 )),
414 }
415 }
416
417 fn parse_detailed_fuzziness(&mut self) -> Result<FuzzyLimits> {
419 let start_pos = self.lexer.position();
420 let mut limits = FuzzyLimits::new();
421
422 loop {
423 let key = match self.lexer.next_token()? {
425 Token::Char(ch) => ch,
426 Token::CloseBrace => break,
427 _ => {
428 return Err(Error::invalid_fuzziness(
429 start_pos,
430 "expected fuzziness key (i, d, s, w, e)",
431 ));
432 }
433 };
434
435 match self.lexer.next_token()? {
437 Token::Char('=') => {}
438 _ => {
439 return Err(Error::invalid_fuzziness(
440 start_pos,
441 "expected '=' after fuzziness key",
442 ));
443 }
444 }
445
446 let value = self.parse_number_u8()?;
448
449 limits = match key {
451 'i' => limits.insertions(value),
452 'd' => limits.deletions(value),
453 's' => limits.substitutions(value),
454 'w' => limits.swaps(value),
455 'e' => limits.edits(value),
456 _ => {
457 return Err(Error::invalid_fuzziness(
458 start_pos,
459 format!("unknown fuzziness key: '{key}'"),
460 ));
461 }
462 };
463
464 match self.lexer.peek_token()? {
466 Token::Char(',') => {
467 self.lexer.next_token()?;
468 }
469 Token::CloseBrace => {
470 self.lexer.next_token()?;
471 break;
472 }
473 _ => {
474 return Err(Error::invalid_fuzziness(
475 start_pos,
476 "expected ',' or '}' in fuzziness specification",
477 ));
478 }
479 }
480 }
481
482 Ok(limits)
483 }
484
485 fn parse_atom(&mut self) -> Result<Ast> {
487 match self.lexer.next_token()? {
488 Token::Char(ch) | Token::Escaped(ch) => Ok(Ast::Char(ch)),
489 Token::Dot => {
490 if self.flags.dot_all {
491 Ok(Ast::CharClass(CharClass::any_with_newlines()))
492 } else {
493 Ok(Ast::CharClass(CharClass::any()))
494 }
495 }
496 Token::Hyphen => Ok(Ast::Char('-')),
498
499 Token::NamedClass(class) => Ok(Self::parse_named_class(class)),
500
501 Token::OpenParen => self.parse_capture_group(),
502 Token::NonCapturing => self.parse_non_capturing_group(),
503 Token::PositiveLookahead => self.parse_lookahead(true),
504 Token::NegativeLookahead => self.parse_lookahead(false),
505 Token::PositiveLookbehind => self.parse_lookbehind(true),
506 Token::NegativeLookbehind => self.parse_lookbehind(false),
507 Token::NamedGroup(name) => self.parse_named_capture_group(name),
508
509 Token::BestMatch => {
511 self.flags.best_match = true;
512 Ok(Ast::Empty)
514 }
515 Token::EnhanceMatch => {
516 self.flags.enhance_match = true;
517 Ok(Ast::Empty)
519 }
520 Token::PosixMatch => {
521 self.flags.posix = true;
522 Ok(Ast::Empty)
524 }
525 Token::Verbose => {
526 self.flags.verbose = true;
527 Ok(Ast::Empty)
528 }
529 Token::DotAll => {
530 self.flags.dot_all = true;
531 Ok(Ast::Empty)
532 }
533 Token::MultiLine => {
534 self.flags.multi_line = true;
535 Ok(Ast::Empty)
536 }
537 Token::Ungreedy => {
538 self.flags.ungreedy = true;
539 Ok(Ast::Empty)
540 }
541 Token::CaseInsensitive => {
542 self.flags.case_insensitive = true;
543 Ok(Ast::Empty)
544 }
545 Token::Global => {
546 self.flags.global = true;
547 Ok(Ast::Empty)
548 }
549 Token::Unicode => {
550 self.flags.unicode = true;
551 Ok(Ast::Empty)
552 }
553
554 Token::OpenBracket => self.parse_char_class(),
555
556 Token::Caret => Ok(Ast::Anchor(Anchor::Start)),
557 Token::Dollar => Ok(Ast::Anchor(Anchor::End)),
558
559 Token::Backreference(n) => {
560 if n > self.capture_count {
561 Err(Error::invalid_backreference(
562 n,
563 self.lexer.position(),
564 format!("backreference to group {n} that doesn't exist yet"),
565 ))
566 } else {
567 let fuzziness = if matches!(self.lexer.peek_token()?, Token::OpenBrace) {
569 if self.peek_is_mrab_fuzziness() {
571 self.lexer.next_token()?; self.parse_mrab_fuzziness()?
573 } else {
574 Fuzziness::Inherited
575 }
576 } else {
577 Fuzziness::Inherited
578 };
579 Ok(Ast::Backreference {
580 group: n,
581 fuzziness,
582 })
583 }
584 }
585
586 Token::NamedList(name) => {
587 let fuzziness = if matches!(self.lexer.peek_token()?, Token::OpenBrace) {
589 if self.peek_is_mrab_fuzziness() {
591 self.lexer.next_token()?; self.parse_mrab_fuzziness()?
593 } else {
594 Fuzziness::Inherited
595 }
596 } else {
597 Fuzziness::Inherited
598 };
599
600 Ok(Ast::NonCapturingGroup {
602 expr: Box::new(Ast::NamedList { name: name.clone() }),
603 fuzziness,
604 })
605 }
606
607 Token::ResetMatchStart => {
608 Ok(Ast::ResetMatchStart)
610 }
611
612 Token::AtomicGroup => self.parse_atomic_group(),
613
614 Token::RecursivePattern => {
615 Ok(Ast::RecursivePattern)
617 }
618
619 Token::RecursiveGroup(group) => {
620 Ok(Ast::RecursiveGroup { group })
622 }
623
624 Token::RecursiveNamedGroup(name) => {
625 Ok(Ast::RecursiveNamedGroup { name })
627 }
628
629 token => Err(Error::parse(
630 self.lexer.position(),
631 format!("unexpected token: {token:?}"),
632 )),
633 }
634 }
635
636 fn parse_named_class(class: NamedClassToken) -> Ast {
638 match class {
639 NamedClassToken::Digit => Ast::CharClass(CharClass::digit()),
640 NamedClassToken::NotDigit => Ast::CharClass(CharClass::new(
641 true,
642 vec![CharClassItem::Named(NamedClass::Digit)],
643 )),
644 NamedClassToken::Word => Ast::CharClass(CharClass::word()),
645 NamedClassToken::NotWord => Ast::CharClass(CharClass::new(
646 true,
647 vec![CharClassItem::Named(NamedClass::Word)],
648 )),
649 NamedClassToken::Whitespace => Ast::CharClass(CharClass::whitespace()),
650 NamedClassToken::NotWhitespace => Ast::CharClass(CharClass::new(
651 true,
652 vec![CharClassItem::Named(NamedClass::Whitespace)],
653 )),
654 NamedClassToken::WordBoundary => Ast::Anchor(Anchor::WordBoundary),
655 NamedClassToken::NotWordBoundary => Ast::Anchor(Anchor::NotWordBoundary),
656 }
657 }
658
659 fn parse_capture_group(&mut self) -> Result<Ast> {
661 self.capture_count += 1;
662 let index = self.capture_count;
663
664 let expr = self.parse_alternation()?;
665
666 match self.lexer.next_token()? {
667 Token::CloseParen => {}
668 _ => return Err(Error::unclosed("group", self.lexer.position())),
669 }
670
671 let fuzziness = if matches!(self.lexer.peek_token()?, Token::Tilde) {
673 self.lexer.next_token()?;
674 self.parse_fuzziness()?
675 } else {
676 Fuzziness::Inherited
677 };
678
679 let expr = if matches!(fuzziness, Fuzziness::Inherited) {
681 expr
682 } else {
683 Self::apply_group_fuzziness(expr, fuzziness)
684 };
685
686 Ok(Ast::Group {
687 index,
688 name: None,
689 expr: Box::new(expr),
690 })
691 }
692
693 fn parse_named_capture_group(&mut self, name: String) -> Result<Ast> {
696 self.capture_count += 1;
697 let index = self.capture_count;
698
699 let expr = self.parse_alternation()?;
700
701 match self.lexer.next_token()? {
702 Token::CloseParen => {}
703 _ => return Err(Error::unclosed("named group", self.lexer.position())),
704 }
705
706 let fuzziness = match self.lexer.peek_token()? {
708 Token::Tilde => {
709 self.lexer.next_token()?;
710 self.parse_fuzziness()?
711 }
712 Token::OpenBrace => {
713 if self.peek_is_mrab_fuzziness() {
715 self.lexer.next_token()?; self.parse_mrab_fuzziness()?
717 } else {
718 Fuzziness::Inherited
719 }
720 }
721 _ => Fuzziness::Inherited,
722 };
723
724 let expr = if matches!(fuzziness, Fuzziness::Inherited) {
725 expr
726 } else {
727 Self::apply_group_fuzziness(expr, fuzziness)
728 };
729
730 Ok(Ast::Group {
731 index,
732 name: Some(name),
733 expr: Box::new(expr),
734 })
735 }
736
737 fn parse_non_capturing_group(&mut self) -> Result<Ast> {
740 let expr = self.parse_alternation()?;
741
742 match self.lexer.next_token()? {
743 Token::CloseParen => {}
744 _ => {
745 return Err(Error::unclosed(
746 "non-capturing group",
747 self.lexer.position(),
748 ));
749 }
750 }
751
752 let fuzziness = match self.lexer.peek_token()? {
754 Token::Tilde => {
755 self.lexer.next_token()?;
756 self.parse_fuzziness()?
757 }
758 Token::OpenBrace => {
759 if self.peek_is_mrab_fuzziness() {
761 self.lexer.next_token()?; self.parse_mrab_fuzziness()?
763 } else {
764 Fuzziness::Inherited
765 }
766 }
767 _ => Fuzziness::Inherited,
768 };
769
770 Ok(Ast::NonCapturingGroup {
771 expr: Box::new(expr),
772 fuzziness,
773 })
774 }
775
776 fn parse_atomic_group(&mut self) -> Result<Ast> {
779 let expr = self.parse_alternation()?;
780
781 match self.lexer.next_token()? {
782 Token::CloseParen => {}
783 _ => return Err(Error::unclosed("atomic group", self.lexer.position())),
784 }
785
786 Ok(Ast::AtomicGroup {
787 expr: Box::new(expr),
788 })
789 }
790
791 fn peek_is_mrab_fuzziness(&self) -> bool {
794 let saved_chars = self.lexer.remaining();
796
797 if let Some(first_char) = saved_chars.chars().nth(1) {
799 match first_char {
802 'i' | 'd' | 's' | 'e' | 't' | 'c' => return true,
803 c if c.is_ascii_digit() => {
804 let rest = &saved_chars[1..];
807 for ch in rest.chars() {
808 if ch == ',' || ch == '}' {
809 return false; }
811 if ch == '<' || ch == '=' {
812 return true; }
814 if ch.is_alphabetic() {
815 return true; }
817 if !ch.is_ascii_digit() {
818 break;
819 }
820 }
821 }
822 _ => return false,
823 }
824 }
825 false
826 }
827
828 fn parse_mrab_fuzziness(&mut self) -> Result<Fuzziness> {
830 let start_pos = self.lexer.position();
831 let mut mrab = MrabFuzziness::new();
832
833 loop {
834 match self.lexer.peek_token()? {
835 Token::CloseBrace => {
836 self.lexer.next_token()?;
837 break;
838 }
839 Token::Char(c) if c.is_ascii_digit() => {
840 let num = self.parse_number_u8()?;
842
843 match self.lexer.peek_token()? {
844 Token::Char('<') => {
845 self.lexer.next_token()?; let lower_inclusive =
848 matches!(self.lexer.peek_token()?, Token::Char('='));
849 if lower_inclusive {
850 self.lexer.next_token()?; }
852
853 let Token::Char(key) = self.lexer.next_token()? else {
855 return Err(Error::invalid_fuzziness(
856 start_pos,
857 "expected error type",
858 ));
859 };
860
861 if key != 'e' {
862 return Err(Error::invalid_fuzziness(
863 start_pos,
864 "range constraint only valid for 'e'",
865 ));
866 }
867
868 let min_val = if lower_inclusive { num } else { num + 1 };
870 mrab.min_errors = Some(min_val);
871
872 if matches!(self.lexer.peek_token()?, Token::Char('<')) {
874 self.lexer.next_token()?;
875 let upper_inclusive =
876 matches!(self.lexer.peek_token()?, Token::Char('='));
877 if upper_inclusive {
878 self.lexer.next_token()?;
879 }
880 let mut max = self.parse_number_u8()?;
881 if !upper_inclusive && max > 0 {
883 max -= 1;
884 }
885 mrab.max_errors = Some(max);
886 }
887 }
888 Token::Char(k) if "idst".contains(k) => {
889 self.lexer.next_token()?;
891 match k {
892 'i' => mrab.insertion_cost = Some(num),
893 'd' => mrab.deletion_cost = Some(num),
894 's' => mrab.substitution_cost = Some(num),
895 't' => mrab.transposition_cost = Some(num),
896 _ => {}
897 }
898 self.parse_cost_continuation(&mut mrab, start_pos)?;
900 }
901 _ => {
902 return Err(Error::invalid_fuzziness(
903 start_pos,
904 "expected error type or '<'",
905 ));
906 }
907 }
908 }
909 Token::Char(key) if "idsetc".contains(key) => {
910 self.lexer.next_token()?;
911
912 match self.lexer.peek_token()? {
914 Token::Char('<') => {
915 self.lexer.next_token()?;
916 let inclusive = matches!(self.lexer.peek_token()?, Token::Char('='));
918 if inclusive {
919 self.lexer.next_token()?;
920 }
921 let mut value = self.parse_number_u8()?;
922
923 if !inclusive && value > 0 {
925 value -= 1;
926 }
927
928 match key {
929 'i' => mrab.max_insertions = Some(value),
930 'd' => mrab.max_deletions = Some(value),
931 's' => mrab.max_substitutions = Some(value),
932 't' => mrab.max_transpositions = Some(value),
933 'e' => mrab.max_errors = Some(value),
934 'c' => {
935 let adjusted = if inclusive { value + 1 } else { value };
938 mrab.insertion_cost = Some(1);
939 mrab.deletion_cost = Some(1);
940 mrab.substitution_cost = Some(1);
941 mrab.transposition_cost = Some(1);
942 mrab.max_cost = Some(adjusted);
943 }
944 _ => {}
945 }
946 }
947 Token::Char(',') | Token::CloseBrace => {
948 match key {
951 'i' => mrab.unlimited_insertions = true,
952 'd' => mrab.unlimited_deletions = true,
953 's' => mrab.unlimited_substitutions = true,
954 't' => mrab.unlimited_transpositions = true,
955 'e' => mrab.unlimited_errors = true,
956 'c' => {
957 mrab.unlimited_errors = true;
959 }
960 _ => {}
961 }
962 }
963 _ => {
964 return Err(Error::invalid_fuzziness(
965 start_pos,
966 "expected '<', '<=' or ','",
967 ));
968 }
969 }
970 }
971 _ => {
972 return Err(Error::invalid_fuzziness(
973 start_pos,
974 "invalid fuzziness specification",
975 ));
976 }
977 }
978
979 match self.lexer.peek_token()? {
981 Token::Char(',') => {
982 self.lexer.next_token()?;
983 }
984 Token::Char(':') => {
985 self.lexer.next_token()?; let class = self.parse_char_class_restriction()?;
988 mrab.substitution_chars = Some(class.clone());
991 mrab.insertion_chars = Some(class.clone());
992 mrab.deletion_chars = Some(class);
993 }
994 _ => {}
995 }
996 }
997
998 Ok(Fuzziness::MrabStyle(mrab))
999 }
1000
1001 fn parse_char_class_restriction(&mut self) -> Result<CharClass> {
1003 match self.lexer.peek_token()? {
1004 Token::OpenBracket => {
1005 self.lexer.next_token()?; self.parse_char_class_inner()
1007 }
1008 Token::NamedClass(class) => {
1009 self.lexer.next_token()?;
1010 let items = match class {
1012 NamedClassToken::Digit => vec![CharClassItem::Named(NamedClass::Digit)],
1013 NamedClassToken::NotDigit => vec![CharClassItem::Named(NamedClass::NotDigit)],
1014 NamedClassToken::Word => vec![CharClassItem::Named(NamedClass::Word)],
1015 NamedClassToken::NotWord => vec![CharClassItem::Named(NamedClass::NotWord)],
1016 NamedClassToken::Whitespace => {
1017 vec![CharClassItem::Named(NamedClass::Whitespace)]
1018 }
1019 NamedClassToken::NotWhitespace => {
1020 vec![CharClassItem::Named(NamedClass::NotWhitespace)]
1021 }
1022 _ => {
1023 return Err(Error::invalid_fuzziness(
1024 self.lexer.position(),
1025 "invalid character class in fuzziness restriction",
1026 ));
1027 }
1028 };
1029 Ok(CharClass::new(false, items))
1030 }
1031 _ => Err(Error::invalid_fuzziness(
1032 self.lexer.position(),
1033 "expected character class after ':'",
1034 )),
1035 }
1036 }
1037
1038 fn parse_char_class_inner(&mut self) -> Result<CharClass> {
1040 let start_pos = self.lexer.position();
1041 let mut items = Vec::new();
1042
1043 let negated = matches!(self.lexer.peek_token()?, Token::Caret);
1045 if negated {
1046 self.lexer.next_token()?;
1047 }
1048
1049 loop {
1050 match self.lexer.peek_token()? {
1051 Token::CloseBracket => {
1052 self.lexer.next_token()?;
1053 break;
1054 }
1055 Token::Eof => return Err(Error::unclosed("character class", start_pos)),
1056 _ => {
1057 let new_items = self.parse_char_class_item()?;
1058 items.extend(new_items);
1059 }
1060 }
1061 }
1062
1063 Ok(CharClass::new(negated, items))
1064 }
1065
1066 fn parse_cost_continuation(
1068 &mut self,
1069 mrab: &mut MrabFuzziness,
1070 start_pos: usize,
1071 ) -> Result<()> {
1072 loop {
1073 match self.lexer.peek_token()? {
1074 Token::Plus | Token::Char('+') => {
1075 self.lexer.next_token()?;
1076 let cost = self.parse_number_u8()?;
1077 let key = match self.lexer.next_token()? {
1078 Token::Char(k) if "idst".contains(k) => k,
1079 _ => {
1080 return Err(Error::invalid_fuzziness(
1081 start_pos,
1082 "expected error type after cost",
1083 ));
1084 }
1085 };
1086 match key {
1087 'i' => mrab.insertion_cost = Some(cost),
1088 'd' => mrab.deletion_cost = Some(cost),
1089 's' => mrab.substitution_cost = Some(cost),
1090 't' => mrab.transposition_cost = Some(cost),
1091 _ => {}
1092 }
1093 }
1094 Token::Char('<') => {
1095 self.lexer.next_token()?;
1096 let inclusive = matches!(self.lexer.peek_token()?, Token::Char('='));
1098 if inclusive {
1099 self.lexer.next_token()?;
1100 }
1101 let max_cost = self.parse_number_u8()?;
1102 let adjusted_cost = if inclusive { max_cost + 1 } else { max_cost };
1108 mrab.max_cost = Some(adjusted_cost);
1109 break;
1110 }
1111 _ => break,
1112 }
1113 }
1114 Ok(())
1115 }
1116
1117 fn apply_group_fuzziness(ast: Ast, fuzziness: Fuzziness) -> Ast {
1119 match ast {
1120 Ast::Literal { text, .. } => Ast::Literal {
1121 text,
1122 fuzziness: fuzziness.clone(),
1123 },
1124 Ast::Char(ch) => Ast::Literal {
1125 text: ch.to_string(),
1126 fuzziness,
1127 },
1128 Ast::Concat(parts) => Ast::Concat(
1129 parts
1130 .into_iter()
1131 .map(|p| Self::apply_group_fuzziness(p, fuzziness.clone()))
1132 .collect(),
1133 ),
1134 Ast::Alternation(alts) => Ast::Alternation(
1135 alts.into_iter()
1136 .map(|a| Self::apply_group_fuzziness(a, fuzziness.clone()))
1137 .collect(),
1138 ),
1139 Ast::Group { index, name, expr } => Ast::Group {
1140 index,
1141 name,
1142 expr: Box::new(Self::apply_group_fuzziness(*expr, fuzziness)),
1143 },
1144 Ast::NonCapturingGroup { expr, .. } => Ast::NonCapturingGroup {
1145 expr: Box::new(Self::apply_group_fuzziness(*expr, fuzziness.clone())),
1146 fuzziness,
1147 },
1148 Ast::Quantified {
1149 expr,
1150 quantifier,
1151 greedy,
1152 } => Ast::Quantified {
1153 expr: Box::new(Self::apply_group_fuzziness(*expr, fuzziness)),
1154 quantifier,
1155 greedy,
1156 },
1157 other => other,
1159 }
1160 }
1161
1162 fn parse_lookahead(&mut self, positive: bool) -> Result<Ast> {
1164 let expr = self.parse_alternation()?;
1165
1166 match self.lexer.next_token()? {
1167 Token::CloseParen => {}
1168 _ => return Err(Error::unclosed("lookahead", self.lexer.position())),
1169 }
1170
1171 Ok(Ast::Lookahead {
1172 positive,
1173 expr: Box::new(expr),
1174 })
1175 }
1176
1177 fn parse_lookbehind(&mut self, positive: bool) -> Result<Ast> {
1179 let expr = self.parse_alternation()?;
1180
1181 match self.lexer.next_token()? {
1182 Token::CloseParen => {}
1183 _ => return Err(Error::unclosed("lookbehind", self.lexer.position())),
1184 }
1185
1186 Ok(Ast::Lookbehind {
1187 positive,
1188 expr: Box::new(expr),
1189 })
1190 }
1191
1192 fn parse_char_class(&mut self) -> Result<Ast> {
1194 let start_pos = self.lexer.position();
1195 let mut items = Vec::new();
1196
1197 let negated = matches!(self.lexer.peek_token()?, Token::Caret);
1199 if negated {
1200 self.lexer.next_token()?;
1201 }
1202
1203 if matches!(self.lexer.peek_token()?, Token::CloseBracket) {
1205 self.lexer.next_token()?;
1206 items.push(CharClassItem::Single(']'));
1207 }
1208
1209 loop {
1210 match self.lexer.peek_token()? {
1211 Token::CloseBracket => {
1212 self.lexer.next_token()?;
1213 break;
1214 }
1215 Token::Eof => return Err(Error::unclosed("character class", start_pos)),
1216 _ => {
1217 let new_items = self.parse_char_class_item()?;
1218 items.extend(new_items);
1219 }
1220 }
1221 }
1222
1223 if items.is_empty() {
1224 return Err(Error::invalid_char_class(
1225 start_pos,
1226 "empty character class",
1227 ));
1228 }
1229
1230 Ok(Ast::CharClass(CharClass::new(negated, items)))
1231 }
1232
1233 fn parse_char_class_item(&mut self) -> Result<Vec<CharClassItem>> {
1237 if let Token::NamedClass(class) = self.lexer.peek_token()? {
1239 self.lexer.next_token()?; let named = match class {
1241 NamedClassToken::Digit => NamedClass::Digit,
1242 NamedClassToken::NotDigit => NamedClass::NotDigit,
1243 NamedClassToken::Word => NamedClass::Word,
1244 NamedClassToken::NotWord => NamedClass::NotWord,
1245 NamedClassToken::Whitespace => NamedClass::Whitespace,
1246 NamedClassToken::NotWhitespace => NamedClass::NotWhitespace,
1247 NamedClassToken::WordBoundary | NamedClassToken::NotWordBoundary => {
1249 return Err(Error::invalid_char_class(
1250 self.lexer.position(),
1251 "word boundary \\b/\\B not valid inside character class".to_string(),
1252 ));
1253 }
1254 };
1255 return Ok(vec![CharClassItem::Named(named)]);
1256 }
1257
1258 let ch = self.parse_char_class_char()?;
1259
1260 if matches!(self.lexer.peek_token()?, Token::Hyphen) {
1262 let saved_pos = self.lexer.position();
1264 self.lexer.next_token()?; if self.lexer.peek_token()? == Token::CloseBracket {
1267 return Ok(vec![CharClassItem::Single(ch), CharClassItem::Single('-')]);
1269 } else {
1270 let end_ch = self.parse_char_class_char()?;
1271 if end_ch < ch {
1272 return Err(Error::invalid_char_class(
1273 saved_pos,
1274 format!("invalid range: '{ch}'-'{end_ch}' (end before start)"),
1275 ));
1276 }
1277 return Ok(vec![CharClassItem::Range(ch, end_ch)]);
1278 }
1279 }
1280
1281 Ok(vec![CharClassItem::Single(ch)])
1282 }
1283
1284 fn parse_char_class_char(&mut self) -> Result<char> {
1287 match self.lexer.next_token()? {
1288 Token::Char(ch) | Token::Escaped(ch) => Ok(ch),
1289 Token::Hyphen => Ok('-'),
1290 Token::Dot => Ok('.'),
1292 Token::Star => Ok('*'),
1293 Token::Plus => Ok('+'),
1294 Token::Question => Ok('?'),
1295 Token::Pipe => Ok('|'),
1296 Token::Caret => Ok('^'), Token::Dollar => Ok('$'),
1298 Token::OpenParen => Ok('('),
1299 Token::CloseParen => Ok(')'),
1300 Token::OpenBrace => Ok('{'),
1301 Token::CloseBrace => Ok('}'),
1302 Token::Tilde => Ok('~'),
1303 Token::NamedClass(class) => {
1304 Err(Error::invalid_char_class(
1307 self.lexer.position(),
1308 format!(
1309 "named class {class:?} not supported inside character class (use separately)"
1310 ),
1311 ))
1312 }
1313 token => Err(Error::invalid_char_class(
1314 self.lexer.position(),
1315 format!("unexpected token in character class: {token:?}"),
1316 )),
1317 }
1318 }
1319}
1320
1321#[derive(Debug, Clone)]
1323pub struct ParseResult {
1324 pub ast: Ast,
1326 pub flags: MatchFlags,
1328 pub capture_count: usize,
1330}
1331
1332pub fn parse(pattern: &str) -> Result<Ast> {
1334 let parser = Parser::new(pattern);
1335 parser.parse()
1336}
1337
1338pub fn parse_with_flags(
1340 pattern: &str,
1341 verbose: bool,
1342 dot_all: bool,
1343 ungreedy: bool,
1344) -> Result<ParseResult> {
1345 let mut parser = Parser::new_with_flags(pattern, verbose, dot_all, ungreedy);
1346 let ast = parser.parse_alternation()?;
1348
1349 if !matches!(parser.lexer.peek_token()?, Token::Eof) {
1350 return Err(Error::parse(
1351 parser.lexer.position(),
1352 "unexpected token after pattern",
1353 ));
1354 }
1355
1356 Ok(ParseResult {
1357 ast,
1358 flags: parser.flags,
1359 capture_count: parser.capture_count,
1360 })
1361}
1362
1363#[cfg(test)]
1364mod tests {
1365 use super::*;
1366
1367 #[test]
1368 fn test_simple_literal() {
1369 let ast = parse("hello").unwrap();
1370 assert!(matches!(ast, Ast::Literal { text, .. } if text == "hello"));
1371 }
1372
1373 #[test]
1374 fn test_alternation() {
1375 let ast = parse("a|b|c").unwrap();
1376 assert!(matches!(ast, Ast::Alternation(_)));
1377 }
1378
1379 #[test]
1380 fn test_quantifiers() {
1381 let ast = parse("a*b+c?d{2}e{1,3}").unwrap();
1382 assert!(matches!(ast, Ast::Concat(_)));
1383 }
1384
1385 #[test]
1386 fn test_capture_group() {
1387 let ast = parse("(abc)").unwrap();
1388 assert!(matches!(ast, Ast::Group { index: 1, .. }));
1389 }
1390
1391 #[test]
1392 fn test_char_class() {
1393 let ast = parse("[a-z]").unwrap();
1394 assert!(matches!(ast, Ast::CharClass(_)));
1395 }
1396
1397 #[test]
1398 fn test_fuzziness_simple() {
1399 let ast = parse("hello~2").unwrap();
1400 match ast {
1401 Ast::Literal { text, fuzziness } => {
1402 assert_eq!(text, "hello");
1403 assert!(matches!(fuzziness, Fuzziness::Edits(2)));
1404 }
1405 _ => panic!("expected literal with fuzziness"),
1406 }
1407 }
1408
1409 #[test]
1410 fn test_fuzziness_detailed() {
1411 let ast = parse("hello~{i=1,d=2}").unwrap();
1412 match ast {
1413 Ast::Literal { text, fuzziness } => {
1414 assert_eq!(text, "hello");
1415 assert!(matches!(fuzziness, Fuzziness::Detailed(_)));
1416 }
1417 _ => panic!("expected literal with detailed fuzziness"),
1418 }
1419 }
1420
1421 #[test]
1422 fn test_group_fuzziness() {
1423 let ast = parse("(hello world)~2").unwrap();
1424 assert!(matches!(ast, Ast::Group { .. }));
1425 }
1426
1427 #[test]
1428 fn test_lookahead() {
1429 let ast = parse("(?=abc)").unwrap();
1430 assert!(matches!(ast, Ast::Lookahead { positive: true, .. }));
1431
1432 let ast = parse("(?!abc)").unwrap();
1433 assert!(matches!(
1434 ast,
1435 Ast::Lookahead {
1436 positive: false,
1437 ..
1438 }
1439 ));
1440 }
1441
1442 #[test]
1443 fn test_anchors() {
1444 let ast = parse("^hello$").unwrap();
1445 match ast {
1446 Ast::Concat(parts) => {
1447 assert!(matches!(parts[0], Ast::Anchor(Anchor::Start)));
1448 assert!(matches!(parts[2], Ast::Anchor(Anchor::End)));
1449 }
1450 _ => panic!("expected concat"),
1451 }
1452 }
1453
1454 #[test]
1455 fn test_named_group() {
1456 let ast = parse("(?<name>abc)").unwrap();
1457 match ast {
1458 Ast::Group { name, .. } => {
1459 assert_eq!(name, Some("name".to_string()));
1460 }
1461 _ => panic!("expected named group"),
1462 }
1463 }
1464
1465 #[test]
1466 fn test_complex_pattern() {
1467 let ast = parse(r"^(\d{3})-(\w+)$").unwrap();
1469 assert!(matches!(ast, Ast::Concat(_)));
1470
1471 let ast = parse(r"^(\d{3})-(test~2)$").unwrap();
1473 assert!(matches!(ast, Ast::Concat(_)));
1474 }
1475
1476 #[test]
1479 fn test_mrab_basic_limits() {
1480 let ast = parse("(?:hello){i<=1}").unwrap();
1482 match ast {
1483 Ast::NonCapturingGroup { fuzziness, .. } => {
1484 assert!(matches!(fuzziness, Fuzziness::MrabStyle(_)));
1485 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1486 assert_eq!(mrab.max_insertions, Some(1));
1487 assert_eq!(mrab.max_deletions, None);
1488 }
1489 }
1490 _ => panic!("expected non-capturing group"),
1491 }
1492 }
1493
1494 #[test]
1495 fn test_mrab_combined_limits() {
1496 let ast = parse("(?:hello){i<=1,d<=2,s<=3}").unwrap();
1498 match ast {
1499 Ast::NonCapturingGroup { fuzziness, .. } => {
1500 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1501 assert_eq!(mrab.max_insertions, Some(1));
1502 assert_eq!(mrab.max_deletions, Some(2));
1503 assert_eq!(mrab.max_substitutions, Some(3));
1504 } else {
1505 panic!("expected MrabStyle fuzziness");
1506 }
1507 }
1508 _ => panic!("expected non-capturing group"),
1509 }
1510 }
1511
1512 #[test]
1513 fn test_mrab_total_errors() {
1514 let ast = parse("(?:hello){e<=5}").unwrap();
1516 match ast {
1517 Ast::NonCapturingGroup { fuzziness, .. } => {
1518 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1519 assert_eq!(mrab.max_errors, Some(5));
1520 } else {
1521 panic!("expected MrabStyle fuzziness");
1522 }
1523 }
1524 _ => panic!("expected non-capturing group"),
1525 }
1526 }
1527
1528 #[test]
1529 fn test_mrab_error_range() {
1530 let ast = parse("(?:hello){1<=e<=3}").unwrap();
1532 match ast {
1533 Ast::NonCapturingGroup { fuzziness, .. } => {
1534 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1535 assert_eq!(mrab.min_errors, Some(1));
1536 assert_eq!(mrab.max_errors, Some(3));
1537 } else {
1538 panic!("expected MrabStyle fuzziness");
1539 }
1540 }
1541 _ => panic!("expected non-capturing group"),
1542 }
1543 }
1544
1545 #[test]
1546 fn test_mrab_exclusive_bounds() {
1547 let ast = parse("(?:hello){i<3}").unwrap();
1549 match ast {
1550 Ast::NonCapturingGroup { fuzziness, .. } => {
1551 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1552 assert_eq!(mrab.max_insertions, Some(2)); } else {
1554 panic!("expected MrabStyle fuzziness");
1555 }
1556 }
1557 _ => panic!("expected non-capturing group"),
1558 }
1559 }
1560
1561 #[test]
1562 fn test_mrab_exclusive_range() {
1563 let ast = parse("(?:hello){1<e<5}").unwrap();
1565 match ast {
1566 Ast::NonCapturingGroup { fuzziness, .. } => {
1567 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1568 assert_eq!(mrab.min_errors, Some(2)); assert_eq!(mrab.max_errors, Some(4)); } else {
1571 panic!("expected MrabStyle fuzziness");
1572 }
1573 }
1574 _ => panic!("expected non-capturing group"),
1575 }
1576 }
1577
1578 #[test]
1579 fn test_mrab_cost_constraint() {
1580 let ast = parse("(?:hello){2i+2d+1s<=4}").unwrap();
1583 match ast {
1584 Ast::NonCapturingGroup { fuzziness, .. } => {
1585 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1586 assert_eq!(mrab.insertion_cost, Some(2));
1587 assert_eq!(mrab.deletion_cost, Some(2));
1588 assert_eq!(mrab.substitution_cost, Some(1));
1589 assert_eq!(mrab.max_cost, Some(5));
1591 } else {
1592 panic!("expected MrabStyle fuzziness");
1593 }
1594 }
1595 _ => panic!("expected non-capturing group"),
1596 }
1597 }
1598
1599 #[test]
1600 fn test_mrab_char_class_restriction() {
1601 let ast = parse("(?:hello){s<=2:[a-z]}").unwrap();
1603 match ast {
1604 Ast::NonCapturingGroup { fuzziness, .. } => {
1605 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1606 assert_eq!(mrab.max_substitutions, Some(2));
1607 assert!(mrab.substitution_chars.is_some());
1608 } else {
1609 panic!("expected MrabStyle fuzziness");
1610 }
1611 }
1612 _ => panic!("expected non-capturing group"),
1613 }
1614 }
1615
1616 #[test]
1617 fn test_mrab_char_class_restriction_escape() {
1618 let ast = parse(r"(?:hello){i<=3:\d}").unwrap();
1620 match ast {
1621 Ast::NonCapturingGroup { fuzziness, .. } => {
1622 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1623 assert_eq!(mrab.max_insertions, Some(3));
1624 assert!(mrab.insertion_chars.is_some());
1625 } else {
1626 panic!("expected MrabStyle fuzziness");
1627 }
1628 }
1629 _ => panic!("expected non-capturing group"),
1630 }
1631 }
1632
1633 #[test]
1634 fn test_bestmatch_flag() {
1635 let result = parse_with_flags("(?b)hello", false, false, false).unwrap();
1637 assert!(result.flags.best_match);
1638 assert!(!result.flags.enhance_match);
1639 }
1640
1641 #[test]
1642 fn test_enhancematch_flag() {
1643 let result = parse_with_flags("(?e)hello", false, false, false).unwrap();
1645 assert!(!result.flags.best_match);
1646 assert!(result.flags.enhance_match);
1647 }
1648
1649 #[test]
1650 fn test_combined_flags() {
1651 let result = parse_with_flags("(?b)(?e)hello", false, false, false).unwrap();
1653 assert!(result.flags.best_match);
1654 assert!(result.flags.enhance_match);
1655 }
1656
1657 #[test]
1658 fn test_mrab_just_type_allowed() {
1659 let ast = parse("(?:hello){i}").unwrap();
1661 match ast {
1662 Ast::NonCapturingGroup { fuzziness, .. } => {
1663 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1664 assert_eq!(mrab.max_insertions, None);
1666 } else {
1667 panic!("expected MrabStyle fuzziness");
1668 }
1669 }
1670 _ => panic!("expected non-capturing group"),
1671 }
1672 }
1673
1674 #[test]
1675 fn test_mrab_mixed_constraints() {
1676 let ast = parse("(?:hello){i<=2,d<=2,e<=3}").unwrap();
1678 match ast {
1679 Ast::NonCapturingGroup { fuzziness, .. } => {
1680 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1681 assert_eq!(mrab.max_insertions, Some(2));
1682 assert_eq!(mrab.max_deletions, Some(2));
1683 assert_eq!(mrab.max_errors, Some(3));
1684 } else {
1685 panic!("expected MrabStyle fuzziness");
1686 }
1687 }
1688 _ => panic!("expected non-capturing group"),
1689 }
1690 }
1691
1692 #[test]
1695 fn test_mrab_transposition_limit() {
1696 let ast = parse("(?:hello){t<=2}").unwrap();
1698 match ast {
1699 Ast::NonCapturingGroup { fuzziness, .. } => {
1700 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1701 assert_eq!(mrab.max_transpositions, Some(2));
1702 } else {
1703 panic!("expected MrabStyle fuzziness");
1704 }
1705 }
1706 _ => panic!("expected non-capturing group"),
1707 }
1708 }
1709
1710 #[test]
1711 fn test_mrab_transposition_exclusive() {
1712 let ast = parse("(?:hello){t<3}").unwrap();
1714 match ast {
1715 Ast::NonCapturingGroup { fuzziness, .. } => {
1716 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1717 assert_eq!(mrab.max_transpositions, Some(2)); } else {
1719 panic!("expected MrabStyle fuzziness");
1720 }
1721 }
1722 _ => panic!("expected non-capturing group"),
1723 }
1724 }
1725
1726 #[test]
1727 fn test_mrab_transposition_combined() {
1728 let ast = parse("(?:hello){i<=1,d<=1,s<=1,t<=1}").unwrap();
1730 match ast {
1731 Ast::NonCapturingGroup { fuzziness, .. } => {
1732 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1733 assert_eq!(mrab.max_insertions, Some(1));
1734 assert_eq!(mrab.max_deletions, Some(1));
1735 assert_eq!(mrab.max_substitutions, Some(1));
1736 assert_eq!(mrab.max_transpositions, Some(1));
1737 } else {
1738 panic!("expected MrabStyle fuzziness");
1739 }
1740 }
1741 _ => panic!("expected non-capturing group"),
1742 }
1743 }
1744
1745 #[test]
1746 fn test_mrab_transposition_unlimited() {
1747 let ast = parse("(?:hello){t}").unwrap();
1749 match ast {
1750 Ast::NonCapturingGroup { fuzziness, .. } => {
1751 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1752 assert!(mrab.unlimited_transpositions);
1753 assert_eq!(mrab.max_transpositions, None);
1754 } else {
1755 panic!("expected MrabStyle fuzziness");
1756 }
1757 }
1758 _ => panic!("expected non-capturing group"),
1759 }
1760 }
1761
1762 #[test]
1763 fn test_mrab_transposition_cost() {
1764 let ast = parse("(?:hello){2i+2d+1s+1t<=4}").unwrap();
1766 match ast {
1767 Ast::NonCapturingGroup { fuzziness, .. } => {
1768 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1769 assert_eq!(mrab.insertion_cost, Some(2));
1770 assert_eq!(mrab.deletion_cost, Some(2));
1771 assert_eq!(mrab.substitution_cost, Some(1));
1772 assert_eq!(mrab.transposition_cost, Some(1));
1773 assert_eq!(mrab.max_cost, Some(5));
1775 } else {
1776 panic!("expected MrabStyle fuzziness");
1777 }
1778 }
1779 _ => panic!("expected non-capturing group"),
1780 }
1781 }
1782
1783 #[test]
1786 fn test_mrab_simple_cost() {
1787 let ast = parse("(?:hello){c<=4}").unwrap();
1789 match ast {
1790 Ast::NonCapturingGroup { fuzziness, .. } => {
1791 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1792 assert_eq!(mrab.insertion_cost, Some(1));
1793 assert_eq!(mrab.deletion_cost, Some(1));
1794 assert_eq!(mrab.substitution_cost, Some(1));
1795 assert_eq!(mrab.transposition_cost, Some(1));
1796 assert_eq!(mrab.max_cost, Some(5));
1798 } else {
1799 panic!("expected MrabStyle fuzziness");
1800 }
1801 }
1802 _ => panic!("expected non-capturing group"),
1803 }
1804 }
1805
1806 #[test]
1807 fn test_mrab_simple_cost_exclusive() {
1808 let ast = parse("(?:hello){c<3}").unwrap();
1810 match ast {
1811 Ast::NonCapturingGroup { fuzziness, .. } => {
1812 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1813 assert_eq!(mrab.insertion_cost, Some(1));
1814 assert_eq!(mrab.deletion_cost, Some(1));
1815 assert_eq!(mrab.substitution_cost, Some(1));
1816 assert_eq!(mrab.transposition_cost, Some(1));
1817 assert_eq!(mrab.max_cost, Some(2));
1819 } else {
1820 panic!("expected MrabStyle fuzziness");
1821 }
1822 }
1823 _ => panic!("expected non-capturing group"),
1824 }
1825 }
1826
1827 #[test]
1828 fn test_mrab_to_limits_transposition() {
1829 let ast = parse("(?:hello){t<=2}").unwrap();
1831 match ast {
1832 Ast::NonCapturingGroup { fuzziness, .. } => {
1833 if let Fuzziness::MrabStyle(mrab) = fuzziness {
1834 let limits = mrab.to_limits();
1835 assert_eq!(limits.get_swaps(), Some(2));
1836 } else {
1837 panic!("expected MrabStyle fuzziness");
1838 }
1839 }
1840 _ => panic!("expected non-capturing group"),
1841 }
1842 }
1843}