1use crate::types::FuzzyLimits;
10
11use crate::ir::EditCharRestriction;
12use crate::parser::{Anchor, Ast, CharClass, CharClassItem, Fuzziness, MrabFuzziness, NamedClass};
13
14#[derive(Debug, Clone, Default)]
16pub struct CostInfo {
17 pub insertion_cost: Option<u8>,
19 pub deletion_cost: Option<u8>,
21 pub substitution_cost: Option<u8>,
23 pub transposition_cost: Option<u8>,
25 pub max_cost: Option<u8>,
27}
28
29impl CostInfo {
30 #[must_use]
32 pub fn has_constraints(&self) -> bool {
33 self.max_cost.is_some()
34 }
35}
36
37#[derive(Debug, Clone)]
39pub enum Hir {
40 Empty,
42
43 Literal {
45 text: String,
47 limits: Option<FuzzyLimits>,
49 min_edits: Option<u8>,
51 cost_info: Option<CostInfo>,
53 edit_chars: Option<EditCharRestriction>,
55 },
56
57 Char(char),
59
60 Class(HirClass),
62
63 FuzzyClass {
66 class: HirClass,
68 limits: Option<FuzzyLimits>,
70 min_edits: Option<u8>,
72 cost_info: Option<CostInfo>,
74 },
75
76 Concat(Vec<Hir>),
78
79 Alt(Vec<Hir>),
81
82 Repeat {
84 expr: Box<Hir>,
86 min: usize,
88 max: Option<usize>,
90 greedy: bool,
92 },
93
94 Capture {
96 index: usize,
98 name: Option<String>,
100 expr: Box<Hir>,
102 },
103
104 Anchor(Anchor),
106
107 Lookahead {
109 positive: bool,
111 expr: Box<Hir>,
113 },
114
115 Lookbehind {
117 positive: bool,
119 expr: Box<Hir>,
121 },
122
123 Backreference {
125 group: usize,
127 limits: Option<FuzzyLimits>,
129 },
130
131 NamedList {
134 name: String,
136 },
137
138 ResetMatchStart,
142
143 AtomicGroup {
146 expr: Box<Hir>,
148 },
149
150 RecursivePattern,
153
154 RecursiveGroup {
156 group: usize,
158 },
159
160 RecursiveNamedGroup {
162 name: String,
164 },
165}
166
167impl Hir {
168 #[must_use]
170 pub fn is_empty(&self) -> bool {
171 matches!(self, Hir::Empty)
172 }
173
174 pub fn literal(
176 text: impl Into<String>,
177 limits: Option<FuzzyLimits>,
178 min_edits: Option<u8>,
179 cost_info: Option<CostInfo>,
180 ) -> Self {
181 Hir::Literal {
182 text: text.into(),
183 limits,
184 min_edits,
185 cost_info,
186 edit_chars: None,
187 }
188 }
189
190 pub fn literal_with_edit_chars(
192 text: impl Into<String>,
193 limits: Option<FuzzyLimits>,
194 min_edits: Option<u8>,
195 cost_info: Option<CostInfo>,
196 edit_chars: Option<EditCharRestriction>,
197 ) -> Self {
198 Hir::Literal {
199 text: text.into(),
200 limits,
201 min_edits,
202 cost_info,
203 edit_chars,
204 }
205 }
206
207 #[must_use]
209 pub fn class(hir_class: HirClass) -> Self {
210 Hir::Class(hir_class)
211 }
212
213 #[must_use]
215 pub fn repeat(expr: Hir, min: usize, max: Option<usize>, greedy: bool) -> Self {
216 Hir::Repeat {
217 expr: Box::new(expr),
218 min,
219 max,
220 greedy,
221 }
222 }
223
224 #[must_use]
226 pub fn capture(index: usize, name: Option<String>, expr: Hir) -> Self {
227 Hir::Capture {
228 index,
229 name,
230 expr: Box::new(expr),
231 }
232 }
233}
234
235#[derive(Debug, Clone)]
237pub struct HirClass {
238 pub negated: bool,
240 pub chars: Vec<char>,
242 pub ranges: Vec<(char, char)>,
244 pub named: Vec<NamedClass>,
246}
247
248impl HirClass {
249 #[must_use]
251 pub fn new(negated: bool) -> Self {
252 HirClass {
253 negated,
254 chars: Vec::new(),
255 ranges: Vec::new(),
256 named: Vec::new(),
257 }
258 }
259
260 #[must_use]
262 pub fn any() -> Self {
263 HirClass {
264 negated: false,
265 chars: Vec::new(),
266 ranges: Vec::new(),
267 named: vec![NamedClass::AnyExceptNewline],
268 }
269 }
270
271 #[must_use]
273 pub fn any_with_newlines() -> Self {
274 HirClass {
275 negated: false,
276 chars: Vec::new(),
277 ranges: Vec::new(),
278 named: vec![NamedClass::Any],
279 }
280 }
281
282 pub fn add_char(&mut self, ch: char) {
284 self.chars.push(ch);
285 }
286
287 pub fn add_range(&mut self, start: char, end: char) {
289 self.ranges.push((start, end));
290 }
291
292 pub fn add_named(&mut self, class: NamedClass) {
294 self.named.push(class);
295 }
296
297 #[must_use]
299 pub fn matches(&self, ch: char) -> bool {
300 self.matches_with_unicode(ch, false)
301 }
302
303 #[must_use]
305 pub fn matches_with_unicode(&self, ch: char, unicode: bool) -> bool {
306 let in_class = self.chars.contains(&ch)
307 || self.ranges.iter().any(|&(s, e)| ch >= s && ch <= e)
308 || self
309 .named
310 .iter()
311 .any(|n| n.matches_with_unicode(ch, unicode));
312
313 if self.negated { !in_class } else { in_class }
314 }
315}
316
317impl From<&CharClass> for HirClass {
318 fn from(class: &CharClass) -> Self {
319 let mut hir = HirClass::new(class.negated);
320
321 for item in &class.items {
322 match item {
323 CharClassItem::Single(ch) => hir.add_char(*ch),
324 CharClassItem::Range(start, end) => hir.add_range(*start, *end),
325 CharClassItem::Named(named) => hir.add_named(*named),
326 }
327 }
328
329 hir
330 }
331}
332
333pub struct HirLowering {
335 default_edits: u8,
337 unicode: bool,
339}
340
341impl HirLowering {
342 #[must_use]
344 pub fn new(default_edits: u8) -> Self {
345 HirLowering {
346 default_edits,
347 unicode: false,
348 }
349 }
350
351 #[must_use]
353 pub fn new_with_unicode(default_edits: u8, unicode: bool) -> Self {
354 HirLowering {
355 default_edits,
356 unicode,
357 }
358 }
359
360 #[must_use]
362 pub fn lower(&self, ast: &Ast) -> Hir {
363 self.lower_ast(ast)
364 }
365
366 fn char_class_to_hir(&self, class: &CharClass) -> HirClass {
368 let mut hir = HirClass::new(class.negated);
369
370 for item in &class.items {
371 match item {
372 CharClassItem::Single(ch) => hir.add_char(*ch),
373 CharClassItem::Range(start, end) => hir.add_range(*start, *end),
374 CharClassItem::Named(named) => {
375 if self.unicode {
377 Self::add_unicode_ranges(*named, &mut hir);
378 } else {
379 hir.add_named(*named);
380 }
381 }
382 }
383 }
384
385 hir
386 }
387
388 fn add_unicode_ranges(named: NamedClass, hir: &mut HirClass) {
390 match named {
391 NamedClass::Word => {
392 hir.add_range('a', 'z');
394 hir.add_range('A', 'Z');
395 hir.add_range('0', '9');
396 hir.add_char('_');
397 hir.add_range('\u{00C0}', '\u{024F}'); hir.add_range('\u{0400}', '\u{04FF}'); hir.add_range('\u{0900}', '\u{097F}'); hir.add_range('\u{4E00}', '\u{9FFF}'); }
403 NamedClass::Digit => {
404 hir.add_range('0', '9');
405 hir.add_range('\u{0660}', '\u{0669}'); hir.add_range('\u{0966}', '\u{096F}'); }
409 NamedClass::Whitespace => {
410 hir.add_char(' ');
411 hir.add_char('\t');
412 hir.add_char('\n');
413 hir.add_char('\r');
414 hir.add_char('\u{0085}'); hir.add_char('\u{00A0}'); hir.add_range('\u{2000}', '\u{200A}'); }
419 NamedClass::NotWord => {
420 hir.add_named(named);
423 }
424 NamedClass::NotDigit
425 | NamedClass::NotWhitespace
426 | NamedClass::Any
427 | NamedClass::AnyExceptNewline => {
428 hir.add_named(named);
429 }
430 }
431 }
432
433 fn lower_ast(&self, ast: &Ast) -> Hir {
434 match ast {
435 Ast::Empty => Hir::Empty,
436
437 Ast::Literal { text, fuzziness } => {
438 let limits = fuzziness.to_limits(self.default_edits);
439 let min_edits = fuzziness.min_edits();
440 let cost_info = extract_cost_info(fuzziness);
441 let edit_chars = extract_edit_chars(fuzziness);
442 Hir::Literal {
443 text: text.clone(),
444 limits,
445 min_edits,
446 cost_info,
447 edit_chars,
448 }
449 }
450
451 Ast::Char(ch) => Hir::Char(*ch),
452
453 Ast::CharClass(class) => Hir::Class(self.char_class_to_hir(class)),
454
455 Ast::Concat(parts) => {
456 let lowered: Vec<_> = parts.iter().map(|p| self.lower_ast(p)).collect();
457 Self::flatten_concat(lowered)
458 }
459
460 Ast::Alternation(alts) => {
461 let lowered: Vec<_> = alts.iter().map(|a| self.lower_ast(a)).collect();
462 if lowered.len() == 1 {
463 lowered.into_iter().next().unwrap()
464 } else {
465 Hir::Alt(lowered)
466 }
467 }
468
469 Ast::Quantified {
470 expr,
471 quantifier,
472 greedy,
473 } => {
474 let inner = self.lower_ast(expr);
475 let (min, max) = (quantifier.min(), quantifier.max());
476 Hir::Repeat {
477 expr: Box::new(inner),
478 min,
479 max,
480 greedy: *greedy,
481 }
482 }
483
484 Ast::Group { index, name, expr } => Hir::Capture {
485 index: *index,
486 name: name.clone(),
487 expr: Box::new(self.lower_ast(expr)),
488 },
489
490 Ast::NonCapturingGroup { expr, fuzziness } => {
491 if matches!(fuzziness, Fuzziness::Inherited) {
493 self.lower_ast(expr)
495 } else {
496 self.lower_with_fuzziness(expr, fuzziness)
497 }
498 }
499
500 Ast::Anchor(anchor) => Hir::Anchor(*anchor),
501
502 Ast::Lookahead { positive, expr } => Hir::Lookahead {
503 positive: *positive,
504 expr: Box::new(self.lower_ast(expr)),
505 },
506
507 Ast::Lookbehind { positive, expr } => Hir::Lookbehind {
508 positive: *positive,
509 expr: Box::new(self.lower_ast(expr)),
510 },
511
512 Ast::Backreference { group, fuzziness } => {
513 let limits = fuzziness.to_limits(self.default_edits);
514 Hir::Backreference {
515 group: *group,
516 limits,
517 }
518 }
519
520 Ast::NamedList { name } => {
521 Hir::NamedList { name: name.clone() }
524 }
525
526 Ast::ResetMatchStart => {
527 Hir::ResetMatchStart
529 }
530
531 Ast::AtomicGroup { expr } => {
532 Hir::AtomicGroup {
534 expr: Box::new(self.lower_ast(expr)),
535 }
536 }
537
538 Ast::RecursivePattern => {
539 Hir::RecursivePattern
542 }
543
544 Ast::RecursiveGroup { group } => {
545 Hir::RecursiveGroup { group: *group }
547 }
548
549 Ast::RecursiveNamedGroup { name } => {
550 Hir::RecursiveNamedGroup { name: name.clone() }
552 }
553 }
554 }
555
556 fn lower_with_fuzziness(&self, ast: &Ast, fuzziness: &Fuzziness) -> Hir {
558 match fuzziness {
560 Fuzziness::Exact => {
561 let lowering = HirLowering::new(0);
563 lowering.lower_ast(ast)
564 }
565 Fuzziness::Edits(n) => {
566 let limits = FuzzyLimits::new().edits(*n);
569 self.lower_with_detailed_limits(ast, &limits, None, None, None)
570 }
571 Fuzziness::Detailed(limits) => {
572 self.lower_with_detailed_limits(ast, limits, None, None, None)
573 }
574 Fuzziness::MrabStyle(mrab) => {
575 let limits = mrab.to_limits();
576 let min_edits = mrab.min_errors;
577 let cost_info = extract_cost_info_from_mrab(mrab);
578 let edit_chars = extract_edit_chars_from_mrab(mrab);
579 self.lower_with_detailed_limits(ast, &limits, min_edits, cost_info, edit_chars)
580 }
581 Fuzziness::Inherited => {
582 let lowering = HirLowering::new(self.default_edits);
584 lowering.lower_ast(ast)
585 }
586 }
587 }
588
589 fn lower_with_detailed_limits(
591 &self,
592 ast: &Ast,
593 limits: &FuzzyLimits,
594 min_edits: Option<u8>,
595 cost_info: Option<CostInfo>,
596 edit_chars: Option<EditCharRestriction>,
597 ) -> Hir {
598 match ast {
599 Ast::Literal { text, .. } => Hir::Literal {
600 text: text.clone(),
601 limits: Some(limits.clone()),
602 min_edits,
603 cost_info: cost_info.clone(),
604 edit_chars: edit_chars.clone(),
605 },
606
607 Ast::Char(ch) => Hir::Literal {
608 text: ch.to_string(),
609 limits: Some(limits.clone()),
610 min_edits,
611 cost_info: cost_info.clone(),
612 edit_chars: edit_chars.clone(),
613 },
614
615 Ast::Concat(parts) => {
616 let lowered: Vec<_> = parts
617 .iter()
618 .map(|p| {
619 self.lower_with_detailed_limits(
620 p,
621 limits,
622 min_edits,
623 cost_info.clone(),
624 edit_chars.clone(),
625 )
626 })
627 .collect();
628 Self::flatten_concat(lowered)
629 }
630
631 Ast::Alternation(alts) => {
632 let lowered: Vec<_> = alts
633 .iter()
634 .map(|a| {
635 self.lower_with_detailed_limits(
636 a,
637 limits,
638 min_edits,
639 cost_info.clone(),
640 edit_chars.clone(),
641 )
642 })
643 .collect();
644 Hir::Alt(lowered)
645 }
646
647 Ast::Quantified {
648 expr,
649 quantifier,
650 greedy,
651 } => {
652 let inner =
653 self.lower_with_detailed_limits(expr, limits, min_edits, cost_info, edit_chars);
654 Hir::Repeat {
655 expr: Box::new(inner),
656 min: quantifier.min(),
657 max: quantifier.max(),
658 greedy: *greedy,
659 }
660 }
661
662 Ast::Group { index, name, expr } => Hir::Capture {
663 index: *index,
664 name: name.clone(),
665 expr: Box::new(
666 self.lower_with_detailed_limits(expr, limits, min_edits, cost_info, edit_chars),
667 ),
668 },
669
670 Ast::NonCapturingGroup { expr, .. } => {
671 self.lower_with_detailed_limits(expr, limits, min_edits, cost_info, edit_chars)
672 }
673
674 Ast::CharClass(class) => Hir::FuzzyClass {
676 class: self.char_class_to_hir(class),
677 limits: Some(limits.clone()),
678 min_edits,
679 cost_info,
680 },
681
682 Ast::Backreference { group, fuzziness } => {
684 let backref_limits = fuzziness
686 .to_limits(self.default_edits)
687 .or_else(|| Some(limits.clone()));
688 Hir::Backreference {
689 group: *group,
690 limits: backref_limits,
691 }
692 }
693
694 other => self.lower_ast(other),
696 }
697 }
698
699 fn flatten_concat(parts: Vec<Hir>) -> Hir {
701 let mut result = Vec::new();
702
703 for part in parts {
704 match part {
705 Hir::Empty => {}
706 Hir::Concat(inner) => result.extend(inner),
707 other => result.push(other),
708 }
709 }
710
711 match result.len() {
712 0 => Hir::Empty,
713 1 => result.pop().unwrap(),
714 _ => Hir::Concat(result),
715 }
716 }
717}
718
719fn extract_cost_info(fuzziness: &Fuzziness) -> Option<CostInfo> {
721 match fuzziness {
722 Fuzziness::MrabStyle(mrab) => {
723 if mrab.max_cost.is_some() {
724 Some(CostInfo {
725 insertion_cost: mrab.insertion_cost,
726 deletion_cost: mrab.deletion_cost,
727 substitution_cost: mrab.substitution_cost,
728 transposition_cost: mrab.transposition_cost,
729 max_cost: mrab.max_cost,
730 })
731 } else {
732 None
733 }
734 }
735 _ => None,
736 }
737}
738
739fn extract_cost_info_from_mrab(mrab: &MrabFuzziness) -> Option<CostInfo> {
741 if mrab.max_cost.is_some() {
742 Some(CostInfo {
743 insertion_cost: mrab.insertion_cost,
744 deletion_cost: mrab.deletion_cost,
745 substitution_cost: mrab.substitution_cost,
746 transposition_cost: mrab.transposition_cost,
747 max_cost: mrab.max_cost,
748 })
749 } else {
750 None
751 }
752}
753
754fn extract_edit_chars(fuzziness: &Fuzziness) -> Option<EditCharRestriction> {
756 match fuzziness {
757 Fuzziness::MrabStyle(mrab) => extract_edit_chars_from_mrab(mrab),
758 _ => None,
759 }
760}
761
762fn extract_edit_chars_from_mrab(mrab: &MrabFuzziness) -> Option<EditCharRestriction> {
766 let char_class = mrab
769 .substitution_chars
770 .as_ref()
771 .or(mrab.insertion_chars.as_ref())
772 .or(mrab.deletion_chars.as_ref())?;
773
774 let mut chars = Vec::new();
775 let mut ranges = Vec::new();
776
777 for item in &char_class.items {
778 match item {
779 CharClassItem::Single(ch) => chars.push(*ch),
780 CharClassItem::Range(start, end) => ranges.push((*start, *end)),
781 CharClassItem::Named(named) => {
782 match named {
784 NamedClass::Digit => ranges.push(('0', '9')),
785 NamedClass::Word => {
786 ranges.push(('a', 'z'));
787 ranges.push(('A', 'Z'));
788 ranges.push(('0', '9'));
789 chars.push('_');
790 }
791 NamedClass::Whitespace => {
792 chars.extend([' ', '\t', '\n', '\r']);
793 }
794 _ => {} }
796 }
797 }
798 }
799
800 if chars.is_empty() && ranges.is_empty() {
801 None
802 } else {
803 Some(EditCharRestriction::new(chars, ranges))
804 }
805}
806
807#[must_use]
809pub fn lower(ast: &Ast, default_edits: u8) -> Hir {
810 lower_with_unicode(ast, default_edits, false)
811}
812
813#[must_use]
815pub fn lower_with_unicode(ast: &Ast, default_edits: u8, unicode: bool) -> Hir {
816 HirLowering::new_with_unicode(default_edits, unicode).lower(ast)
817}
818
819#[cfg(test)]
820mod tests {
821 use super::*;
822 use crate::parser::parse;
823
824 #[test]
825 fn test_lower_literal() {
826 let ast = parse("hello").unwrap();
827 let hir = lower(&ast, 2);
828 match hir {
829 Hir::Literal { text, limits, .. } => {
830 assert_eq!(text, "hello");
831 assert!(limits.is_some());
832 }
833 _ => panic!("expected literal"),
834 }
835 }
836
837 #[test]
838 fn test_lower_literal_exact() {
839 let ast = parse("hello~0").unwrap();
840 let hir = lower(&ast, 2);
841 match hir {
842 Hir::Literal { text, limits, .. } => {
843 assert_eq!(text, "hello");
844 assert!(limits.is_some());
846 }
847 _ => panic!("expected literal"),
848 }
849 }
850
851 #[test]
852 fn test_lower_concat() {
853 let ast = parse("ab").unwrap();
854 let hir = lower(&ast, 0);
855 assert!(matches!(hir, Hir::Literal { .. }));
856 }
857
858 #[test]
859 fn test_lower_alternation() {
860 let ast = parse("a|b").unwrap();
861 let hir = lower(&ast, 0);
862 assert!(matches!(hir, Hir::Alt(_)));
863 }
864
865 #[test]
866 fn test_lower_quantifier() {
867 let ast = parse("a+").unwrap();
868 let hir = lower(&ast, 0);
869 match hir {
870 Hir::Repeat { min, max, .. } => {
871 assert_eq!(min, 1);
872 assert_eq!(max, None);
873 }
874 _ => panic!("expected repeat"),
875 }
876 }
877
878 #[test]
879 fn test_lower_group() {
880 let ast = parse("(abc)").unwrap();
881 let hir = lower(&ast, 0);
882 assert!(matches!(hir, Hir::Capture { index: 1, .. }));
883 }
884
885 #[test]
886 fn test_lower_non_capturing_inlined() {
887 let ast = parse("(?:abc)").unwrap();
888 let hir = lower(&ast, 0);
889 assert!(matches!(hir, Hir::Literal { .. }));
891 }
892}