Skip to main content

fuzzy_regex/ir/
hir.rs

1//! High-level Intermediate Representation (HIR) for fuzzy regex.
2//!
3//! HIR is a simplified and normalized form of the AST that:
4//! - Inlines non-capturing groups
5//! - Normalizes character classes
6//! - Resolves fuzziness inheritance
7//! - Prepares for NFA construction
8
9use crate::types::FuzzyLimits;
10
11use crate::ir::EditCharRestriction;
12use crate::parser::{Anchor, Ast, CharClass, CharClassItem, Fuzziness, MrabFuzziness, NamedClass};
13
14/// Cost information for fuzzy matching.
15#[derive(Debug, Clone, Default)]
16pub struct CostInfo {
17    /// Cost per insertion.
18    pub insertion_cost: Option<u8>,
19    /// Cost per deletion.
20    pub deletion_cost: Option<u8>,
21    /// Cost per substitution.
22    pub substitution_cost: Option<u8>,
23    /// Cost per transposition.
24    pub transposition_cost: Option<u8>,
25    /// Maximum total cost allowed.
26    pub max_cost: Option<u8>,
27}
28
29impl CostInfo {
30    /// Check if this cost info has any constraints.
31    #[must_use]
32    pub fn has_constraints(&self) -> bool {
33        self.max_cost.is_some()
34    }
35}
36
37/// HIR node representing a simplified regex pattern.
38#[derive(Debug, Clone)]
39pub enum Hir {
40    /// Empty expression.
41    Empty,
42
43    /// A literal segment that can use fuzzy matching.
44    Literal {
45        /// The literal text to match.
46        text: String,
47        /// Fuzzy matching limits (insertions, deletions, substitutions).
48        limits: Option<FuzzyLimits>,
49        /// Minimum edits required (for exclusive lower bounds like `{0<e<5}`).
50        min_edits: Option<u8>,
51        /// Cost constraint info.
52        cost_info: Option<CostInfo>,
53        /// Character class restriction for edits (e.g., `{e<=1:[a-z]}`).
54        edit_chars: Option<EditCharRestriction>,
55    },
56
57    /// A single character (exact match).
58    Char(char),
59
60    /// Character class (cannot use fuzzy matching directly).
61    Class(HirClass),
62
63    /// Character class with fuzzy matching support.
64    /// Used when a character class is inside a fuzzy group like `(?:[a-z])~1`.
65    FuzzyClass {
66        /// The character class to match.
67        class: HirClass,
68        /// Fuzzy matching limits (insertions, deletions, substitutions).
69        limits: Option<FuzzyLimits>,
70        /// Minimum edits required (for exclusive lower bounds).
71        min_edits: Option<u8>,
72        /// Cost constraint info.
73        cost_info: Option<CostInfo>,
74    },
75
76    /// Concatenation of expressions.
77    Concat(Vec<Hir>),
78
79    /// Alternation of expressions.
80    Alt(Vec<Hir>),
81
82    /// Repetition.
83    Repeat {
84        /// The expression to repeat.
85        expr: Box<Hir>,
86        /// Minimum number of repetitions.
87        min: usize,
88        /// Maximum number of repetitions (None means unbounded).
89        max: Option<usize>,
90        /// Whether the repetition is greedy.
91        greedy: bool,
92    },
93
94    /// Capture group.
95    Capture {
96        /// The capture group index (1-based).
97        index: usize,
98        /// Optional name for named capture groups.
99        name: Option<String>,
100        /// The expression inside the capture group.
101        expr: Box<Hir>,
102    },
103
104    /// Anchor.
105    Anchor(Anchor),
106
107    /// Lookahead assertion.
108    Lookahead {
109        /// Whether this is a positive lookahead (true) or negative lookahead (false).
110        positive: bool,
111        /// The expression to match in the lookahead.
112        expr: Box<Hir>,
113    },
114
115    /// Lookbehind assertion.
116    Lookbehind {
117        /// Whether this is a positive lookbehind (true) or negative lookbehind (false).
118        positive: bool,
119        /// The expression to match in the lookbehind.
120        expr: Box<Hir>,
121    },
122
123    /// Backreference to a capture group, optionally with fuzzy matching.
124    Backreference {
125        /// The capture group index to reference (1-based).
126        group: usize,
127        /// Fuzzy matching limits for the backreference.
128        limits: Option<FuzzyLimits>,
129    },
130
131    /// Named list reference: \L<name>.
132    /// This will be expanded to an alternation of the word list at match time.
133    NamedList {
134        /// The name of the word list.
135        name: String,
136    },
137
138    /// Reset match start: \K
139    /// Resets the starting point of the match. Everything before \K is matched
140    /// but not included in the final match result.
141    ResetMatchStart,
142
143    /// Atomic group: (?>expr)
144    /// Once the group matches, backtracking is disabled within the group.
145    AtomicGroup {
146        /// The expression contained in the atomic group.
147        expr: Box<Hir>,
148    },
149
150    /// Recursive entire pattern: (?R)
151    /// Recursively matches the entire pattern.
152    RecursivePattern,
153
154    /// Recursive numbered group: (?1), (?2), etc.
155    RecursiveGroup {
156        /// The capture group number to recurse into.
157        group: usize,
158    },
159
160    /// Recursive named group: (?&name) or (?P>name)
161    RecursiveNamedGroup {
162        /// The name of the capture group to recurse into.
163        name: String,
164    },
165}
166
167impl Hir {
168    /// Check if this HIR is empty.
169    #[must_use]
170    pub fn is_empty(&self) -> bool {
171        matches!(self, Hir::Empty)
172    }
173
174    /// Create a literal HIR node.
175    pub fn literal(
176        text: impl Into<String>,
177        limits: Option<FuzzyLimits>,
178        min_edits: Option<u8>,
179        cost_info: Option<CostInfo>,
180    ) -> Self {
181        Hir::Literal {
182            text: text.into(),
183            limits,
184            min_edits,
185            cost_info,
186            edit_chars: None,
187        }
188    }
189
190    /// Create a literal HIR node with character class restriction.
191    pub fn literal_with_edit_chars(
192        text: impl Into<String>,
193        limits: Option<FuzzyLimits>,
194        min_edits: Option<u8>,
195        cost_info: Option<CostInfo>,
196        edit_chars: Option<EditCharRestriction>,
197    ) -> Self {
198        Hir::Literal {
199            text: text.into(),
200            limits,
201            min_edits,
202            cost_info,
203            edit_chars,
204        }
205    }
206
207    /// Create a character class HIR node.
208    #[must_use]
209    pub fn class(hir_class: HirClass) -> Self {
210        Hir::Class(hir_class)
211    }
212
213    /// Create a repetition HIR node.
214    #[must_use]
215    pub fn repeat(expr: Hir, min: usize, max: Option<usize>, greedy: bool) -> Self {
216        Hir::Repeat {
217            expr: Box::new(expr),
218            min,
219            max,
220            greedy,
221        }
222    }
223
224    /// Create a capture group HIR node.
225    #[must_use]
226    pub fn capture(index: usize, name: Option<String>, expr: Hir) -> Self {
227        Hir::Capture {
228            index,
229            name,
230            expr: Box::new(expr),
231        }
232    }
233}
234
235/// Normalized character class for HIR.
236#[derive(Debug, Clone)]
237pub struct HirClass {
238    /// Whether this is a negated class.
239    pub negated: bool,
240    /// Enumerated single characters.
241    pub chars: Vec<char>,
242    /// Character ranges (inclusive).
243    pub ranges: Vec<(char, char)>,
244    /// Named classes.
245    pub named: Vec<NamedClass>,
246}
247
248impl HirClass {
249    /// Create a new empty character class.
250    #[must_use]
251    pub fn new(negated: bool) -> Self {
252        HirClass {
253            negated,
254            chars: Vec::new(),
255            ranges: Vec::new(),
256            named: Vec::new(),
257        }
258    }
259
260    /// Create a class for any character except newlines (default `.`).
261    #[must_use]
262    pub fn any() -> Self {
263        HirClass {
264            negated: false,
265            chars: Vec::new(),
266            ranges: Vec::new(),
267            named: vec![NamedClass::AnyExceptNewline],
268        }
269    }
270
271    /// Create a class for any character including newlines (`dot_all` `.`).
272    #[must_use]
273    pub fn any_with_newlines() -> Self {
274        HirClass {
275            negated: false,
276            chars: Vec::new(),
277            ranges: Vec::new(),
278            named: vec![NamedClass::Any],
279        }
280    }
281
282    /// Add a single character.
283    pub fn add_char(&mut self, ch: char) {
284        self.chars.push(ch);
285    }
286
287    /// Add a character range.
288    pub fn add_range(&mut self, start: char, end: char) {
289        self.ranges.push((start, end));
290    }
291
292    /// Add a named class.
293    pub fn add_named(&mut self, class: NamedClass) {
294        self.named.push(class);
295    }
296
297    /// Check if a character matches this class.
298    #[must_use]
299    pub fn matches(&self, ch: char) -> bool {
300        self.matches_with_unicode(ch, false)
301    }
302
303    /// Check if a character matches this class with unicode support.
304    #[must_use]
305    pub fn matches_with_unicode(&self, ch: char, unicode: bool) -> bool {
306        let in_class = self.chars.contains(&ch)
307            || self.ranges.iter().any(|&(s, e)| ch >= s && ch <= e)
308            || self
309                .named
310                .iter()
311                .any(|n| n.matches_with_unicode(ch, unicode));
312
313        if self.negated { !in_class } else { in_class }
314    }
315}
316
317impl From<&CharClass> for HirClass {
318    fn from(class: &CharClass) -> Self {
319        let mut hir = HirClass::new(class.negated);
320
321        for item in &class.items {
322            match item {
323                CharClassItem::Single(ch) => hir.add_char(*ch),
324                CharClassItem::Range(start, end) => hir.add_range(*start, *end),
325                CharClassItem::Named(named) => hir.add_named(*named),
326            }
327        }
328
329        hir
330    }
331}
332
333/// Lower an AST to HIR.
334pub struct HirLowering {
335    /// Default edit count for inherited fuzziness.
336    default_edits: u8,
337    /// Unicode mode - enable Unicode character classes.
338    unicode: bool,
339}
340
341impl HirLowering {
342    /// Create a new HIR lowering pass with default fuzziness.
343    #[must_use]
344    pub fn new(default_edits: u8) -> Self {
345        HirLowering {
346            default_edits,
347            unicode: false,
348        }
349    }
350
351    /// Create a new HIR lowering pass with default fuzziness and unicode mode.
352    #[must_use]
353    pub fn new_with_unicode(default_edits: u8, unicode: bool) -> Self {
354        HirLowering {
355            default_edits,
356            unicode,
357        }
358    }
359
360    /// Lower an AST to HIR.
361    #[must_use]
362    pub fn lower(&self, ast: &Ast) -> Hir {
363        self.lower_ast(ast)
364    }
365
366    /// Convert `CharClass` to `HirClass` with unicode mode.
367    fn char_class_to_hir(&self, class: &CharClass) -> HirClass {
368        let mut hir = HirClass::new(class.negated);
369
370        for item in &class.items {
371            match item {
372                CharClassItem::Single(ch) => hir.add_char(*ch),
373                CharClassItem::Range(start, end) => hir.add_range(*start, *end),
374                CharClassItem::Named(named) => {
375                    // In unicode mode, add unicode-aware ranges for named classes
376                    if self.unicode {
377                        Self::add_unicode_ranges(*named, &mut hir);
378                    } else {
379                        hir.add_named(*named);
380                    }
381                }
382            }
383        }
384
385        hir
386    }
387
388    /// Add unicode-aware ranges for named character classes.
389    fn add_unicode_ranges(named: NamedClass, hir: &mut HirClass) {
390        match named {
391            NamedClass::Word => {
392                // ASCII word chars
393                hir.add_range('a', 'z');
394                hir.add_range('A', 'Z');
395                hir.add_range('0', '9');
396                hir.add_char('_');
397                // Add common unicode word char ranges
398                hir.add_range('\u{00C0}', '\u{024F}'); // Latin Extended
399                hir.add_range('\u{0400}', '\u{04FF}'); // Cyrillic
400                hir.add_range('\u{0900}', '\u{097F}'); // Devanagari
401                hir.add_range('\u{4E00}', '\u{9FFF}'); // CJK
402            }
403            NamedClass::Digit => {
404                hir.add_range('0', '9');
405                // Add common unicode digit ranges
406                hir.add_range('\u{0660}', '\u{0669}'); // Arabic-Indic
407                hir.add_range('\u{0966}', '\u{096F}'); // Devanagari
408            }
409            NamedClass::Whitespace => {
410                hir.add_char(' ');
411                hir.add_char('\t');
412                hir.add_char('\n');
413                hir.add_char('\r');
414                // Add unicode whitespace
415                hir.add_char('\u{0085}'); // NEL
416                hir.add_char('\u{00A0}'); // No-Break Space
417                hir.add_range('\u{2000}', '\u{200A}'); // Spaces
418            }
419            NamedClass::NotWord => {
420                // Negated: add all but word chars - add unicode ranges as negated
421                // This is complex, so fall back to named class
422                hir.add_named(named);
423            }
424            NamedClass::NotDigit
425            | NamedClass::NotWhitespace
426            | NamedClass::Any
427            | NamedClass::AnyExceptNewline => {
428                hir.add_named(named);
429            }
430        }
431    }
432
433    fn lower_ast(&self, ast: &Ast) -> Hir {
434        match ast {
435            Ast::Empty => Hir::Empty,
436
437            Ast::Literal { text, fuzziness } => {
438                let limits = fuzziness.to_limits(self.default_edits);
439                let min_edits = fuzziness.min_edits();
440                let cost_info = extract_cost_info(fuzziness);
441                let edit_chars = extract_edit_chars(fuzziness);
442                Hir::Literal {
443                    text: text.clone(),
444                    limits,
445                    min_edits,
446                    cost_info,
447                    edit_chars,
448                }
449            }
450
451            Ast::Char(ch) => Hir::Char(*ch),
452
453            Ast::CharClass(class) => Hir::Class(self.char_class_to_hir(class)),
454
455            Ast::Concat(parts) => {
456                let lowered: Vec<_> = parts.iter().map(|p| self.lower_ast(p)).collect();
457                Self::flatten_concat(lowered)
458            }
459
460            Ast::Alternation(alts) => {
461                let lowered: Vec<_> = alts.iter().map(|a| self.lower_ast(a)).collect();
462                if lowered.len() == 1 {
463                    lowered.into_iter().next().unwrap()
464                } else {
465                    Hir::Alt(lowered)
466                }
467            }
468
469            Ast::Quantified {
470                expr,
471                quantifier,
472                greedy,
473            } => {
474                let inner = self.lower_ast(expr);
475                let (min, max) = (quantifier.min(), quantifier.max());
476                Hir::Repeat {
477                    expr: Box::new(inner),
478                    min,
479                    max,
480                    greedy: *greedy,
481                }
482            }
483
484            Ast::Group { index, name, expr } => Hir::Capture {
485                index: *index,
486                name: name.clone(),
487                expr: Box::new(self.lower_ast(expr)),
488            },
489
490            Ast::NonCapturingGroup { expr, fuzziness } => {
491                // If the non-capturing group has its own fuzziness, apply it
492                if matches!(fuzziness, Fuzziness::Inherited) {
493                    // Just inline the contents
494                    self.lower_ast(expr)
495                } else {
496                    self.lower_with_fuzziness(expr, fuzziness)
497                }
498            }
499
500            Ast::Anchor(anchor) => Hir::Anchor(*anchor),
501
502            Ast::Lookahead { positive, expr } => Hir::Lookahead {
503                positive: *positive,
504                expr: Box::new(self.lower_ast(expr)),
505            },
506
507            Ast::Lookbehind { positive, expr } => Hir::Lookbehind {
508                positive: *positive,
509                expr: Box::new(self.lower_ast(expr)),
510            },
511
512            Ast::Backreference { group, fuzziness } => {
513                let limits = fuzziness.to_limits(self.default_edits);
514                Hir::Backreference {
515                    group: *group,
516                    limits,
517                }
518            }
519
520            Ast::NamedList { name } => {
521                // NamedList will be expanded later when the word list is provided
522                // For now, create a placeholder that will be resolved at match time
523                Hir::NamedList { name: name.clone() }
524            }
525
526            Ast::ResetMatchStart => {
527                // \K resets the match start position
528                Hir::ResetMatchStart
529            }
530
531            Ast::AtomicGroup { expr } => {
532                // Atomic group - lower the inner expression
533                Hir::AtomicGroup {
534                    expr: Box::new(self.lower_ast(expr)),
535                }
536            }
537
538            Ast::RecursivePattern => {
539                // (?R) - recursively match the entire pattern
540                // This will be resolved during NFA construction
541                Hir::RecursivePattern
542            }
543
544            Ast::RecursiveGroup { group } => {
545                // (?1), (?2), etc. - recursively match a capture group
546                Hir::RecursiveGroup { group: *group }
547            }
548
549            Ast::RecursiveNamedGroup { name } => {
550                // (?&name) or (?P>name) - recursively match a named capture group
551                Hir::RecursiveNamedGroup { name: name.clone() }
552            }
553        }
554    }
555
556    /// Lower an expression with specific fuzziness override.
557    fn lower_with_fuzziness(&self, ast: &Ast, fuzziness: &Fuzziness) -> Hir {
558        // Lower with the new default, but for detailed/mrab limits, inject them directly
559        match fuzziness {
560            Fuzziness::Exact => {
561                // Exact match - no edits allowed
562                let lowering = HirLowering::new(0);
563                lowering.lower_ast(ast)
564            }
565            Fuzziness::Edits(n) => {
566                // Simple edit count - convert to FuzzyLimits and use detailed path
567                // This ensures character classes get FuzzyClass treatment
568                let limits = FuzzyLimits::new().edits(*n);
569                self.lower_with_detailed_limits(ast, &limits, None, None, None)
570            }
571            Fuzziness::Detailed(limits) => {
572                self.lower_with_detailed_limits(ast, limits, None, None, None)
573            }
574            Fuzziness::MrabStyle(mrab) => {
575                let limits = mrab.to_limits();
576                let min_edits = mrab.min_errors;
577                let cost_info = extract_cost_info_from_mrab(mrab);
578                let edit_chars = extract_edit_chars_from_mrab(mrab);
579                self.lower_with_detailed_limits(ast, &limits, min_edits, cost_info, edit_chars)
580            }
581            Fuzziness::Inherited => {
582                // Use the default edits from self
583                let lowering = HirLowering::new(self.default_edits);
584                lowering.lower_ast(ast)
585            }
586        }
587    }
588
589    /// Lower an expression with detailed fuzzy limits applied to all literals.
590    fn lower_with_detailed_limits(
591        &self,
592        ast: &Ast,
593        limits: &FuzzyLimits,
594        min_edits: Option<u8>,
595        cost_info: Option<CostInfo>,
596        edit_chars: Option<EditCharRestriction>,
597    ) -> Hir {
598        match ast {
599            Ast::Literal { text, .. } => Hir::Literal {
600                text: text.clone(),
601                limits: Some(limits.clone()),
602                min_edits,
603                cost_info: cost_info.clone(),
604                edit_chars: edit_chars.clone(),
605            },
606
607            Ast::Char(ch) => Hir::Literal {
608                text: ch.to_string(),
609                limits: Some(limits.clone()),
610                min_edits,
611                cost_info: cost_info.clone(),
612                edit_chars: edit_chars.clone(),
613            },
614
615            Ast::Concat(parts) => {
616                let lowered: Vec<_> = parts
617                    .iter()
618                    .map(|p| {
619                        self.lower_with_detailed_limits(
620                            p,
621                            limits,
622                            min_edits,
623                            cost_info.clone(),
624                            edit_chars.clone(),
625                        )
626                    })
627                    .collect();
628                Self::flatten_concat(lowered)
629            }
630
631            Ast::Alternation(alts) => {
632                let lowered: Vec<_> = alts
633                    .iter()
634                    .map(|a| {
635                        self.lower_with_detailed_limits(
636                            a,
637                            limits,
638                            min_edits,
639                            cost_info.clone(),
640                            edit_chars.clone(),
641                        )
642                    })
643                    .collect();
644                Hir::Alt(lowered)
645            }
646
647            Ast::Quantified {
648                expr,
649                quantifier,
650                greedy,
651            } => {
652                let inner =
653                    self.lower_with_detailed_limits(expr, limits, min_edits, cost_info, edit_chars);
654                Hir::Repeat {
655                    expr: Box::new(inner),
656                    min: quantifier.min(),
657                    max: quantifier.max(),
658                    greedy: *greedy,
659                }
660            }
661
662            Ast::Group { index, name, expr } => Hir::Capture {
663                index: *index,
664                name: name.clone(),
665                expr: Box::new(
666                    self.lower_with_detailed_limits(expr, limits, min_edits, cost_info, edit_chars),
667                ),
668            },
669
670            Ast::NonCapturingGroup { expr, .. } => {
671                self.lower_with_detailed_limits(expr, limits, min_edits, cost_info, edit_chars)
672            }
673
674            // Character class with fuzzy limits becomes FuzzyClass
675            Ast::CharClass(class) => Hir::FuzzyClass {
676                class: self.char_class_to_hir(class),
677                limits: Some(limits.clone()),
678                min_edits,
679                cost_info,
680            },
681
682            // Backreference inside fuzzy group gets the fuzzy limits
683            Ast::Backreference { group, fuzziness } => {
684                // If the backref already has its own fuzziness, use that; otherwise inherit from group
685                let backref_limits = fuzziness
686                    .to_limits(self.default_edits)
687                    .or_else(|| Some(limits.clone()));
688                Hir::Backreference {
689                    group: *group,
690                    limits: backref_limits,
691                }
692            }
693
694            // Other nodes don't need fuzziness applied
695            other => self.lower_ast(other),
696        }
697    }
698
699    /// Flatten nested concats and filter empties.
700    fn flatten_concat(parts: Vec<Hir>) -> Hir {
701        let mut result = Vec::new();
702
703        for part in parts {
704            match part {
705                Hir::Empty => {}
706                Hir::Concat(inner) => result.extend(inner),
707                other => result.push(other),
708            }
709        }
710
711        match result.len() {
712            0 => Hir::Empty,
713            1 => result.pop().unwrap(),
714            _ => Hir::Concat(result),
715        }
716    }
717}
718
719/// Extract cost info from fuzziness specification.
720fn extract_cost_info(fuzziness: &Fuzziness) -> Option<CostInfo> {
721    match fuzziness {
722        Fuzziness::MrabStyle(mrab) => {
723            if mrab.max_cost.is_some() {
724                Some(CostInfo {
725                    insertion_cost: mrab.insertion_cost,
726                    deletion_cost: mrab.deletion_cost,
727                    substitution_cost: mrab.substitution_cost,
728                    transposition_cost: mrab.transposition_cost,
729                    max_cost: mrab.max_cost,
730                })
731            } else {
732                None
733            }
734        }
735        _ => None,
736    }
737}
738
739/// Extract cost info from `MrabFuzziness`.
740fn extract_cost_info_from_mrab(mrab: &MrabFuzziness) -> Option<CostInfo> {
741    if mrab.max_cost.is_some() {
742        Some(CostInfo {
743            insertion_cost: mrab.insertion_cost,
744            deletion_cost: mrab.deletion_cost,
745            substitution_cost: mrab.substitution_cost,
746            transposition_cost: mrab.transposition_cost,
747            max_cost: mrab.max_cost,
748        })
749    } else {
750        None
751    }
752}
753
754/// Extract edit character restriction from fuzziness specification.
755fn extract_edit_chars(fuzziness: &Fuzziness) -> Option<EditCharRestriction> {
756    match fuzziness {
757        Fuzziness::MrabStyle(mrab) => extract_edit_chars_from_mrab(mrab),
758        _ => None,
759    }
760}
761
762/// Extract edit character restriction from `MrabFuzziness`.
763/// Combines all character class restrictions (substitution, insertion, deletion)
764/// into a single restriction since mrab-regex syntax like `{e<=1:[a-z]}` applies to all edit types.
765fn extract_edit_chars_from_mrab(mrab: &MrabFuzziness) -> Option<EditCharRestriction> {
766    // Check for any character class restriction
767    // In mrab-regex, `{e<=1:[a-z]}` applies the class to all edit types
768    let char_class = mrab
769        .substitution_chars
770        .as_ref()
771        .or(mrab.insertion_chars.as_ref())
772        .or(mrab.deletion_chars.as_ref())?;
773
774    let mut chars = Vec::new();
775    let mut ranges = Vec::new();
776
777    for item in &char_class.items {
778        match item {
779            CharClassItem::Single(ch) => chars.push(*ch),
780            CharClassItem::Range(start, end) => ranges.push((*start, *end)),
781            CharClassItem::Named(named) => {
782                // Expand named classes to ranges
783                match named {
784                    NamedClass::Digit => ranges.push(('0', '9')),
785                    NamedClass::Word => {
786                        ranges.push(('a', 'z'));
787                        ranges.push(('A', 'Z'));
788                        ranges.push(('0', '9'));
789                        chars.push('_');
790                    }
791                    NamedClass::Whitespace => {
792                        chars.extend([' ', '\t', '\n', '\r']);
793                    }
794                    _ => {} // Skip negated and Any classes
795                }
796            }
797        }
798    }
799
800    if chars.is_empty() && ranges.is_empty() {
801        None
802    } else {
803        Some(EditCharRestriction::new(chars, ranges))
804    }
805}
806
807/// Lower an AST to HIR with default fuzziness.
808#[must_use]
809pub fn lower(ast: &Ast, default_edits: u8) -> Hir {
810    lower_with_unicode(ast, default_edits, false)
811}
812
813/// Lower an AST to HIR with unicode mode.
814#[must_use]
815pub fn lower_with_unicode(ast: &Ast, default_edits: u8, unicode: bool) -> Hir {
816    HirLowering::new_with_unicode(default_edits, unicode).lower(ast)
817}
818
819#[cfg(test)]
820mod tests {
821    use super::*;
822    use crate::parser::parse;
823
824    #[test]
825    fn test_lower_literal() {
826        let ast = parse("hello").unwrap();
827        let hir = lower(&ast, 2);
828        match hir {
829            Hir::Literal { text, limits, .. } => {
830                assert_eq!(text, "hello");
831                assert!(limits.is_some());
832            }
833            _ => panic!("expected literal"),
834        }
835    }
836
837    #[test]
838    fn test_lower_literal_exact() {
839        let ast = parse("hello~0").unwrap();
840        let hir = lower(&ast, 2);
841        match hir {
842            Hir::Literal { text, limits, .. } => {
843                assert_eq!(text, "hello");
844                // limits should allow 0 edits
845                assert!(limits.is_some());
846            }
847            _ => panic!("expected literal"),
848        }
849    }
850
851    #[test]
852    fn test_lower_concat() {
853        let ast = parse("ab").unwrap();
854        let hir = lower(&ast, 0);
855        assert!(matches!(hir, Hir::Literal { .. }));
856    }
857
858    #[test]
859    fn test_lower_alternation() {
860        let ast = parse("a|b").unwrap();
861        let hir = lower(&ast, 0);
862        assert!(matches!(hir, Hir::Alt(_)));
863    }
864
865    #[test]
866    fn test_lower_quantifier() {
867        let ast = parse("a+").unwrap();
868        let hir = lower(&ast, 0);
869        match hir {
870            Hir::Repeat { min, max, .. } => {
871                assert_eq!(min, 1);
872                assert_eq!(max, None);
873            }
874            _ => panic!("expected repeat"),
875        }
876    }
877
878    #[test]
879    fn test_lower_group() {
880        let ast = parse("(abc)").unwrap();
881        let hir = lower(&ast, 0);
882        assert!(matches!(hir, Hir::Capture { index: 1, .. }));
883    }
884
885    #[test]
886    fn test_lower_non_capturing_inlined() {
887        let ast = parse("(?:abc)").unwrap();
888        let hir = lower(&ast, 0);
889        // Non-capturing group should be inlined
890        assert!(matches!(hir, Hir::Literal { .. }));
891    }
892}