fancy_regex/
parse.rs

1// Copyright 2016 The Fancy Regex Authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! A regex parser yielding an AST.
22
23use crate::RegexOptions;
24use alloc::boxed::Box;
25use alloc::string::{String, ToString};
26use alloc::vec::Vec;
27use alloc::{format, vec};
28
29use bit_set::BitSet;
30use regex_syntax::escape_into;
31
32use crate::parse_flags::*;
33use crate::{codepoint_len, CompileError, Error, Expr, ParseError, Result, MAX_RECURSION};
34use crate::{Assertion, LookAround::*};
35
36#[cfg(not(feature = "std"))]
37pub(crate) type NamedGroups = alloc::collections::BTreeMap<String, usize>;
38#[cfg(feature = "std")]
39pub(crate) type NamedGroups = std::collections::HashMap<String, usize>;
40
41#[derive(Debug, Clone)]
42pub struct ExprTree {
43    pub expr: Expr,
44    pub backrefs: BitSet,
45    pub named_groups: NamedGroups,
46    #[allow(dead_code)]
47    pub(crate) contains_subroutines: bool,
48    pub(crate) self_recursive: bool,
49}
50
51#[derive(Debug)]
52pub(crate) struct Parser<'a> {
53    re: &'a str, // source
54    backrefs: BitSet,
55    flags: u32,
56    named_groups: NamedGroups,
57    numeric_backrefs: bool,
58    curr_group: usize, // need to keep track of which group number we're parsing
59    contains_subroutines: bool,
60    has_unresolved_subroutines: bool,
61    self_recursive: bool,
62}
63
64struct NamedBackrefOrSubroutine<'a> {
65    ix: usize,
66    group_ix: Option<usize>,
67    group_name: Option<&'a str>,
68    recursion_level: Option<isize>,
69}
70
71impl<'a> Parser<'a> {
72    pub(crate) fn parse_with_flags(re: &str, flags: u32) -> Result<ExprTree> {
73        let mut p = Parser::new(re, flags);
74        let (ix, mut expr) = p.parse_re(0, 0)?;
75        if ix < re.len() {
76            return Err(Error::ParseError(
77                ix,
78                ParseError::GeneralParseError("end of string not reached".to_string()),
79            ));
80        }
81
82        if p.has_unresolved_subroutines {
83            p.has_unresolved_subroutines = false;
84            p.resolve_named_subroutine_calls(&mut expr);
85        }
86
87        Ok(ExprTree {
88            expr,
89            backrefs: p.backrefs,
90            named_groups: p.named_groups,
91            contains_subroutines: p.contains_subroutines,
92            self_recursive: p.self_recursive,
93        })
94    }
95
96    pub(crate) fn parse(re: &str) -> Result<ExprTree> {
97        Self::parse_with_flags(re, RegexOptions::default().compute_flags())
98    }
99
100    fn new(re: &str, flags: u32) -> Parser<'_> {
101        let flags = flags | FLAG_UNICODE;
102
103        Parser {
104            re,
105            backrefs: Default::default(),
106            named_groups: Default::default(),
107            numeric_backrefs: false,
108            flags,
109            curr_group: 0,
110            contains_subroutines: false,
111            has_unresolved_subroutines: false,
112            self_recursive: false,
113        }
114    }
115
116    fn parse_re(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
117        let (ix, child) = self.parse_branch(ix, depth)?;
118        let mut ix = self.optional_whitespace(ix)?;
119        if self.re[ix..].starts_with('|') {
120            let mut children = vec![child];
121            while self.re[ix..].starts_with('|') {
122                ix += 1;
123                let (next, child) = self.parse_branch(ix, depth)?;
124                children.push(child);
125                ix = self.optional_whitespace(next)?;
126            }
127            return Ok((ix, Expr::Alt(children)));
128        }
129        // can't have numeric backrefs and named backrefs
130        if self.numeric_backrefs && !self.named_groups.is_empty() {
131            return Err(Error::CompileError(Box::new(
132                CompileError::NamedBackrefOnly,
133            )));
134        }
135        Ok((ix, child))
136    }
137
138    fn parse_branch(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
139        let mut children = Vec::new();
140        let mut ix = ix;
141        while ix < self.re.len() {
142            let (next, child) = self.parse_piece(ix, depth)?;
143            if next == ix {
144                break;
145            }
146            if child != Expr::Empty {
147                children.push(child);
148            }
149            ix = next;
150        }
151        match children.len() {
152            0 => Ok((ix, Expr::Empty)),
153            1 => Ok((ix, children.pop().unwrap())),
154            _ => Ok((ix, Expr::Concat(children))),
155        }
156    }
157
158    fn parse_piece(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
159        let (ix, child) = self.parse_atom(ix, depth)?;
160        let mut ix = self.optional_whitespace(ix)?;
161        if ix < self.re.len() {
162            // fail when child is empty?
163            let (lo, hi) = match self.re.as_bytes()[ix] {
164                b'?' => (0, 1),
165                b'*' => (0, usize::MAX),
166                b'+' => (1, usize::MAX),
167                b'{' => {
168                    match self.parse_repeat(ix) {
169                        Ok((next, lo, hi)) => {
170                            ix = next - 1;
171                            (lo, hi)
172                        }
173                        Err(_) => {
174                            // Invalid repeat syntax, which results in `{` being treated as a literal
175                            return Ok((ix, child));
176                        }
177                    }
178                }
179                _ => return Ok((ix, child)),
180            };
181            if !self.is_repeatable(&child) {
182                return Err(Error::ParseError(ix, ParseError::TargetNotRepeatable));
183            }
184            ix += 1;
185            ix = self.optional_whitespace(ix)?;
186            let mut greedy = true;
187            if ix < self.re.len() && self.re.as_bytes()[ix] == b'?' {
188                greedy = false;
189                ix += 1;
190            }
191            greedy ^= self.flag(FLAG_SWAP_GREED);
192            let mut node = Expr::Repeat {
193                child: Box::new(child),
194                lo,
195                hi,
196                greedy,
197            };
198            if ix < self.re.len() && self.re.as_bytes()[ix] == b'+' {
199                ix += 1;
200                node = Expr::AtomicGroup(Box::new(node));
201            }
202            return Ok((ix, node));
203        }
204        Ok((ix, child))
205    }
206
207    fn is_repeatable(&self, child: &Expr) -> bool {
208        match child {
209            Expr::LookAround(_, _) => false,
210            Expr::Empty => false,
211            // In Oniguruma mode, repetition after assertions is not allowed
212            Expr::Assertion(_) => !self.flag(FLAG_ONIGURUMA_MODE),
213            Expr::KeepOut => false,
214            Expr::ContinueFromPreviousMatchEnd => false,
215            Expr::BackrefExistsCondition(_) => false,
216            _ => true,
217        }
218    }
219
220    // ix, lo, hi
221    fn parse_repeat(&self, ix: usize) -> Result<(usize, usize, usize)> {
222        let ix = self.optional_whitespace(ix + 1)?; // skip opening '{'
223        let bytes = self.re.as_bytes();
224        if ix == self.re.len() {
225            return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
226        }
227        let mut end = ix;
228        let lo = if bytes[ix] == b',' {
229            0
230        } else if let Some((next, lo)) = parse_decimal(self.re, ix) {
231            end = next;
232            lo
233        } else {
234            return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
235        };
236        let ix = self.optional_whitespace(end)?; // past lo number
237        if ix == self.re.len() {
238            return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
239        }
240        end = ix;
241        let hi = match bytes[ix] {
242            b'}' => lo,
243            b',' => {
244                end = self.optional_whitespace(ix + 1)?; // past ','
245                if let Some((next, hi)) = parse_decimal(self.re, end) {
246                    end = next;
247                    hi
248                } else {
249                    usize::MAX
250                }
251            }
252            _ => return Err(Error::ParseError(ix, ParseError::InvalidRepeat)),
253        };
254        let ix = self.optional_whitespace(end)?; // past hi number
255        if ix == self.re.len() || bytes[ix] != b'}' {
256            return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
257        }
258        Ok((ix + 1, lo, hi))
259    }
260
261    fn parse_atom(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
262        let ix = self.optional_whitespace(ix)?;
263        if ix == self.re.len() {
264            return Ok((ix, Expr::Empty));
265        }
266        match self.re.as_bytes()[ix] {
267            b'.' => Ok((
268                ix + 1,
269                Expr::Any {
270                    newline: self.flag(FLAG_DOTNL),
271                },
272            )),
273            b'^' => Ok((
274                ix + 1,
275                if self.flag(FLAG_MULTI) {
276                    // TODO: support crlf flag
277                    Expr::Assertion(Assertion::StartLine { crlf: false })
278                } else {
279                    Expr::Assertion(Assertion::StartText)
280                },
281            )),
282            b'$' => Ok((
283                ix + 1,
284                if self.flag(FLAG_MULTI) {
285                    // TODO: support crlf flag
286                    Expr::Assertion(Assertion::EndLine { crlf: false })
287                } else {
288                    Expr::Assertion(Assertion::EndText)
289                },
290            )),
291            b'(' => self.parse_group(ix, depth),
292            b'\\' => self.parse_escape(ix, false),
293            b'+' | b'*' | b'?' | b'|' | b')' => Ok((ix, Expr::Empty)),
294            b'[' => self.parse_class(ix),
295            b => {
296                // TODO: maybe want to match multiple codepoints?
297                let next = ix + codepoint_len(b);
298                Ok((
299                    next,
300                    Expr::Literal {
301                        val: String::from(&self.re[ix..next]),
302                        casei: self.flag(FLAG_CASEI),
303                    },
304                ))
305            }
306        }
307    }
308
309    fn parse_named_backref(
310        &mut self,
311        ix: usize,
312        open: &str,
313        close: &str,
314        allow_relative: bool,
315    ) -> Result<(usize, Expr)> {
316        let NamedBackrefOrSubroutine {
317            ix: end,
318            group_ix,
319            group_name,
320            recursion_level,
321        } = self.parse_named_backref_or_subroutine(ix, open, close, allow_relative)?;
322        if let Some(group) = group_ix {
323            self.backrefs.insert(group);
324            return Ok((
325                end,
326                if let Some(recursion_level) = recursion_level {
327                    Expr::BackrefWithRelativeRecursionLevel {
328                        group,
329                        relative_level: recursion_level,
330                        casei: self.flag(FLAG_CASEI),
331                    }
332                } else {
333                    Expr::Backref {
334                        group,
335                        casei: self.flag(FLAG_CASEI),
336                    }
337                },
338            ));
339        }
340        if let Some(group_name) = group_name {
341            // here the name was parsed but doesn't match a capture group we have already parsed
342            return Err(Error::ParseError(
343                ix,
344                ParseError::InvalidGroupNameBackref(group_name.to_string()),
345            ));
346        }
347        unreachable!()
348    }
349
350    fn parse_named_subroutine_call(
351        &mut self,
352        ix: usize,
353        open: &str,
354        close: &str,
355        allow_relative: bool,
356    ) -> Result<(usize, Expr)> {
357        let NamedBackrefOrSubroutine {
358            ix: end,
359            group_ix,
360            group_name,
361            recursion_level,
362        } = self.parse_named_backref_or_subroutine(ix, open, close, allow_relative)?;
363        if recursion_level.is_some() {
364            return Err(Error::ParseError(ix, ParseError::InvalidGroupName));
365        }
366        if let Some(group) = group_ix {
367            self.contains_subroutines = true;
368            if group == 0 {
369                self.self_recursive = true;
370            }
371            return Ok((end, Expr::SubroutineCall(group)));
372        }
373        if let Some(group_name) = group_name {
374            // here the name was parsed but doesn't match a capture group we have already parsed
375            let expr = Expr::UnresolvedNamedSubroutineCall {
376                name: group_name.to_string(),
377                ix,
378            };
379            self.has_unresolved_subroutines = true;
380            self.contains_subroutines = true;
381            return Ok((end, expr));
382        }
383        unreachable!()
384    }
385
386    fn parse_named_backref_or_subroutine(
387        &self,
388        ix: usize,
389        open: &str,
390        close: &str,
391        allow_relative: bool,
392    ) -> Result<NamedBackrefOrSubroutine<'_>> {
393        if let Some(ParsedId {
394            id,
395            mut relative,
396            skip,
397        }) = parse_id(&self.re[ix..], open, close, allow_relative)
398        {
399            let group = if let Some(group) = self.named_groups.get(id) {
400                Some(*group)
401            } else if let Ok(group) = id.parse::<usize>() {
402                Some(group)
403            } else if let Some(relative_group) = relative {
404                if id.is_empty() {
405                    relative = None;
406                    self.curr_group.checked_add_signed(if relative_group < 0 {
407                        relative_group + 1
408                    } else {
409                        relative_group
410                    })
411                } else {
412                    None
413                }
414            } else {
415                None
416            };
417            if let Some(group) = group {
418                Ok(NamedBackrefOrSubroutine {
419                    ix: ix + skip,
420                    group_ix: Some(group),
421                    group_name: None,
422                    recursion_level: relative,
423                })
424            } else {
425                // here the name was parsed but doesn't match a capture group we have already parsed
426                Ok(NamedBackrefOrSubroutine {
427                    ix: ix + skip,
428                    group_ix: None,
429                    group_name: Some(id),
430                    recursion_level: relative,
431                })
432            }
433        } else {
434            // in this case the name can't be parsed
435            Err(Error::ParseError(ix, ParseError::InvalidGroupName))
436        }
437    }
438
439    fn parse_numbered_backref(&mut self, ix: usize) -> Result<(usize, Expr)> {
440        let (end, group) = self.parse_numbered_backref_or_subroutine_call(ix)?;
441        self.numeric_backrefs = true;
442        self.backrefs.insert(group);
443        Ok((
444            end,
445            Expr::Backref {
446                group,
447                casei: self.flag(FLAG_CASEI),
448            },
449        ))
450    }
451
452    fn parse_numbered_subroutine_call(&mut self, ix: usize) -> Result<(usize, Expr)> {
453        let (end, group) = self.parse_numbered_backref_or_subroutine_call(ix)?;
454        self.numeric_backrefs = true;
455        self.contains_subroutines = true;
456        if group == 0 {
457            self.self_recursive = true;
458        }
459        Ok((end, Expr::SubroutineCall(group)))
460    }
461
462    fn parse_numbered_backref_or_subroutine_call(&self, ix: usize) -> Result<(usize, usize)> {
463        if let Some((end, group)) = parse_decimal(self.re, ix) {
464            // protect BitSet against unreasonably large value
465            if group < self.re.len() / 2 {
466                return Ok((end, group));
467            }
468        }
469        Err(Error::ParseError(ix, ParseError::InvalidBackref))
470    }
471
472    // ix points to \ character
473    fn parse_escape(&mut self, ix: usize, in_class: bool) -> Result<(usize, Expr)> {
474        let bytes = self.re.as_bytes();
475        let Some(b) = bytes.get(ix + 1).copied() else {
476            return Err(Error::ParseError(ix, ParseError::TrailingBackslash));
477        };
478        let end = ix + 1 + codepoint_len(b);
479        Ok(if b.is_ascii_digit() {
480            return self.parse_numbered_backref(ix + 1);
481        } else if matches!(b, b'k') && !in_class {
482            // Named backref: \k<name>
483            if bytes.get(end) == Some(&b'\'') {
484                return self.parse_named_backref(end, "'", "'", true);
485            } else {
486                return self.parse_named_backref(end, "<", ">", true);
487            }
488        } else if b == b'A' && !in_class {
489            (end, Expr::Assertion(Assertion::StartText))
490        } else if b == b'z' && !in_class {
491            (end, Expr::Assertion(Assertion::EndText))
492        } else if b == b'Z' && !in_class {
493            (
494                end,
495                Expr::LookAround(
496                    Box::new(Expr::Delegate {
497                        inner: "\\n*$".to_string(),
498                        size: 0,
499                        casei: false,
500                    }),
501                    LookAhead,
502                ),
503            )
504        } else if (b == b'b' || b == b'B') && !in_class {
505            let check_pos = self.optional_whitespace(end)?;
506            if bytes.get(check_pos) == Some(&b'{') {
507                let next_open_brace_pos = self.optional_whitespace(check_pos + 1)?;
508                let is_repetition = matches!(
509                    bytes.get(next_open_brace_pos),
510                    Some(&ch) if ch.is_ascii_digit() || ch == b','
511                );
512                if !is_repetition {
513                    return self.parse_word_boundary_brace(ix);
514                }
515            }
516            let expr = if b == b'b' {
517                Expr::Assertion(Assertion::WordBoundary)
518            } else {
519                Expr::Assertion(Assertion::NotWordBoundary)
520            };
521            (end, expr)
522        } else if b == b'<' && !in_class {
523            let expr = if self.flag(FLAG_ONIGURUMA_MODE) {
524                make_literal("<")
525            } else {
526                Expr::Assertion(Assertion::LeftWordBoundary)
527            };
528            (end, expr)
529        } else if b == b'>' && !in_class {
530            let expr = if self.flag(FLAG_ONIGURUMA_MODE) {
531                make_literal(">")
532            } else {
533                Expr::Assertion(Assertion::RightWordBoundary)
534            };
535            (end, expr)
536        } else if matches!(b | 32, b'd' | b's' | b'w') {
537            (
538                end,
539                Expr::Delegate {
540                    inner: String::from(&self.re[ix..end]),
541                    size: 1,
542                    casei: self.flag(FLAG_CASEI),
543                },
544            )
545        } else if (b | 32) == b'h' {
546            let s = if b == b'h' {
547                "[0-9A-Fa-f]"
548            } else {
549                "[^0-9A-Fa-f]"
550            };
551            (
552                end,
553                Expr::Delegate {
554                    inner: String::from(s),
555                    size: 1,
556                    casei: false,
557                },
558            )
559        } else if b == b'x' {
560            let end = self.optional_whitespace(end)?;
561            return self.parse_hex(end, 2);
562        } else if b == b'u' {
563            let end = self.optional_whitespace(end)?;
564            return self.parse_hex(end, 4);
565        } else if b == b'U' {
566            let end = self.optional_whitespace(end)?;
567            return self.parse_hex(end, 8);
568        } else if (b | 32) == b'p' && end != bytes.len() {
569            let mut end = end;
570            let b = bytes[end];
571            end += codepoint_len(b);
572            if b == b'{' {
573                loop {
574                    if end == self.re.len() {
575                        return Err(Error::ParseError(ix, ParseError::UnclosedUnicodeName));
576                    }
577                    let b = bytes[end];
578                    if b == b'}' {
579                        end += 1;
580                        break;
581                    }
582                    end += codepoint_len(b);
583                }
584            }
585            (
586                end,
587                Expr::Delegate {
588                    inner: String::from(&self.re[ix..end]),
589                    size: 1,
590                    casei: self.flag(FLAG_CASEI),
591                },
592            )
593        } else if b == b'K' && !in_class {
594            (end, Expr::KeepOut)
595        } else if b == b'G' && !in_class {
596            (end, Expr::ContinueFromPreviousMatchEnd)
597        } else if b == b'O' && !in_class {
598            (end, Expr::Any { newline: true })
599        } else if b == b'g' && !in_class {
600            if end == self.re.len() {
601                return Err(Error::ParseError(
602                    ix,
603                    ParseError::InvalidEscape("\\g".to_string()),
604                ));
605            }
606            let b = bytes[end];
607            if b.is_ascii_digit() {
608                self.parse_numbered_subroutine_call(end)?
609            } else if b == b'\'' {
610                self.parse_named_subroutine_call(end, "'", "'", true)?
611            } else {
612                self.parse_named_subroutine_call(end, "<", ">", true)?
613            }
614        } else {
615            // printable ASCII (including space, see issue #29)
616            (
617                end,
618                make_literal(match b {
619                    b'a' => "\x07", // BEL
620                    b'b' => "\x08", // BS
621                    b'f' => "\x0c", // FF
622                    b'n' => "\n",   // LF
623                    b'r' => "\r",   // CR
624                    b't' => "\t",   // TAB
625                    b'v' => "\x0b", // VT
626                    b'e' => "\x1b", // ESC
627                    b' ' => " ",
628                    b => {
629                        let s = &self.re[ix + 1..end];
630                        if b.is_ascii_alphabetic()
631                            && !matches!(
632                                b,
633                                b'k' | b'A' | b'z' | b'b' | b'B' | b'<' | b'>' | b'K' | b'G'
634                            )
635                        {
636                            return Err(Error::ParseError(
637                                ix,
638                                ParseError::InvalidEscape(format!("\\{}", s)),
639                            ));
640                        } else {
641                            s
642                        }
643                    }
644                }),
645            )
646        })
647    }
648
649    // ix points after '\x', eg to 'A0' or '{12345}', or after `\u` or `\U`
650    fn parse_hex(&self, ix: usize, digits: usize) -> Result<(usize, Expr)> {
651        if ix >= self.re.len() {
652            // Incomplete escape sequence
653            return Err(Error::ParseError(ix, ParseError::InvalidHex));
654        }
655        let bytes = self.re.as_bytes();
656        let b = bytes[ix];
657        // Parse fixed-width hex (e.g., \xAB)
658        if ix + digits <= self.re.len() && bytes[ix..ix + digits].iter().all(|&b| is_hex_digit(b)) {
659            let hex_str = &self.re[ix..ix + digits];
660            return self.hex_to_literal(ix, ix + digits, hex_str);
661        }
662        // Parse brace-enclosed hex (e.g., \u{00AB})
663        if b == b'{' {
664            let mut pos = ix + 1;
665            let mut hex_chars = String::new();
666            while pos < self.re.len() {
667                // Skip whitespace/comments if FLAG_IGNORE_SPACE is set
668                pos = self.optional_whitespace(pos)?;
669                if pos >= self.re.len() {
670                    return Err(Error::ParseError(ix, ParseError::InvalidHex));
671                }
672                let b = bytes[pos];
673                if b == b'}' && !hex_chars.is_empty() {
674                    return self.hex_to_literal(ix, pos + 1, &hex_chars);
675                }
676                if is_hex_digit(b) && hex_chars.len() < 8 {
677                    hex_chars.push(b as char);
678                    pos += 1;
679                } else {
680                    return Err(Error::ParseError(ix, ParseError::InvalidHex));
681                }
682            }
683        }
684        Err(Error::ParseError(ix, ParseError::InvalidHex))
685    }
686
687    fn hex_to_literal(&self, ix: usize, end: usize, hex_str: &str) -> Result<(usize, Expr)> {
688        let codepoint = u32::from_str_radix(hex_str, 16).unwrap();
689        if let Some(c) = char::from_u32(codepoint) {
690            Ok((
691                end,
692                Expr::Literal {
693                    val: c.to_string(),
694                    casei: self.flag(FLAG_CASEI),
695                },
696            ))
697        } else {
698            Err(Error::ParseError(ix, ParseError::InvalidCodepointValue))
699        }
700    }
701
702    // ix points before '\b' or '\B'
703    fn parse_word_boundary_brace(&self, ix: usize) -> Result<(usize, Expr)> {
704        let bytes = self.re.as_bytes();
705
706        // Verify that we have '\b' or '\B'
707        if !matches!(bytes.get(ix..ix + 2), Some([b'\\', b'b' | b'B'])) {
708            return Err(Error::ParseError(
709                ix,
710                ParseError::InvalidEscape("\\b{...}".to_string()),
711            ));
712        }
713        // Skip whitespace/comments after \b or \B if FLAG_IGNORE_SPACE is set
714        let brace_start = self.optional_whitespace(ix + 2)?;
715        // Verify we have '{'
716        if bytes.get(brace_start) != Some(&b'{') {
717            return Err(Error::ParseError(
718                ix,
719                ParseError::InvalidEscape("\\b{...}".to_string()),
720            ));
721        }
722        // Extract content between braces
723        let mut pos = brace_start + 1;
724        let mut content = String::new();
725        while pos < self.re.len() {
726            let b = bytes[pos];
727            if b == b'}' {
728                break;
729            }
730            // Skip whitespace/comments if FLAG_IGNORE_SPACE is set
731            let next_pos = self.optional_whitespace(pos)?;
732            if next_pos > pos {
733                // Whitespace was skipped
734                pos = next_pos;
735                if pos >= self.re.len() || bytes[pos] == b'}' {
736                    break;
737                }
738            }
739            // Add non-whitespace character to content
740            let b = bytes[pos];
741            if b != b'}' {
742                content.push(b as char);
743                pos += codepoint_len(b);
744            }
745        }
746
747        let end_brace = pos;
748        if end_brace >= self.re.len() || bytes[end_brace] != b'}' {
749            return Err(Error::ParseError(
750                ix,
751                ParseError::InvalidEscape("\\b{...}".to_string()),
752            ));
753        }
754
755        // \B{...} is not supported
756        if bytes[ix + 1] == b'B' {
757            return Err(Error::ParseError(
758                ix,
759                ParseError::InvalidEscape(format!("\\B{{{}}}", content)),
760            ));
761        }
762
763        let expr = match content.as_str() {
764            "start" => Expr::Assertion(Assertion::LeftWordBoundary),
765            "end" => Expr::Assertion(Assertion::RightWordBoundary),
766            "start-half" => Expr::Assertion(Assertion::LeftWordHalfBoundary),
767            "end-half" => Expr::Assertion(Assertion::RightWordHalfBoundary),
768            _ => {
769                return Err(Error::ParseError(
770                    ix,
771                    ParseError::InvalidEscape(format!("\\b{{{}}}", content)),
772                ));
773            }
774        };
775
776        Ok((end_brace + 1, expr))
777    }
778
779    fn parse_class(&mut self, ix: usize) -> Result<(usize, Expr)> {
780        let bytes = self.re.as_bytes();
781        let mut ix = ix + 1; // skip opening '['
782        let mut class = String::new();
783        let mut nest = 1;
784        class.push('[');
785
786        // Negated character class
787        if bytes.get(ix) == Some(&b'^') {
788            class.push('^');
789            ix += 1;
790        }
791
792        // `]` does not have to be escaped after opening `[` or `[^`
793        if bytes.get(ix) == Some(&b']') {
794            class.push(']');
795            ix += 1;
796        }
797
798        loop {
799            if ix == self.re.len() {
800                return Err(Error::ParseError(ix, ParseError::InvalidClass));
801            }
802            let end = match bytes[ix] {
803                b'\\' => {
804                    // We support more escapes than regex, so parse it ourselves before delegating.
805                    let (end, expr) = self.parse_escape(ix, true)?;
806                    match expr {
807                        Expr::Literal { val, .. } => {
808                            debug_assert_eq!(val.chars().count(), 1);
809                            escape_into(&val, &mut class);
810                        }
811                        Expr::Delegate { inner, .. } => {
812                            class.push_str(&inner);
813                        }
814                        _ => {
815                            return Err(Error::ParseError(ix, ParseError::InvalidClass));
816                        }
817                    }
818                    end
819                }
820                b'[' => {
821                    nest += 1;
822                    class.push('[');
823                    ix + 1
824                }
825                b']' => {
826                    nest -= 1;
827                    class.push(']');
828                    if nest == 0 {
829                        break;
830                    }
831                    ix + 1
832                }
833                b => {
834                    let end = ix + codepoint_len(b);
835                    class.push_str(&self.re[ix..end]);
836                    end
837                }
838            };
839            ix = end;
840        }
841        let class = Expr::Delegate {
842            inner: class,
843            size: 1,
844            casei: self.flag(FLAG_CASEI),
845        };
846        let ix = ix + 1; // skip closing ']'
847        Ok((ix, class))
848    }
849
850    fn parse_group(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
851        let depth = depth + 1;
852        if depth >= MAX_RECURSION {
853            return Err(Error::ParseError(ix, ParseError::RecursionExceeded));
854        }
855        let ix = self.optional_whitespace(ix + 1)?;
856        let (la, skip) = if self.re[ix..].starts_with("?=") {
857            (Some(LookAhead), 2)
858        } else if self.re[ix..].starts_with("?!") {
859            (Some(LookAheadNeg), 2)
860        } else if self.re[ix..].starts_with("?<=") {
861            (Some(LookBehind), 3)
862        } else if self.re[ix..].starts_with("?<!") {
863            (Some(LookBehindNeg), 3)
864        } else if self.re[ix..].starts_with("?<") || self.re[ix..].starts_with("?'") {
865            // Named capture group using Oniguruma syntax: (?<name>...) or (?'name'...)
866            self.curr_group += 1;
867            let (open, close) = if self.re[ix..].starts_with("?<") {
868                ("<", ">")
869            } else {
870                ("'", "'")
871            };
872            if let Some(ParsedId {
873                id,
874                relative: None,
875                skip,
876            }) = parse_id(&self.re[ix + 1..], open, close, false)
877            {
878                self.named_groups.insert(id.to_string(), self.curr_group);
879                (None, skip + 1)
880            } else {
881                return Err(Error::ParseError(ix, ParseError::InvalidGroupName));
882            }
883        } else if self.re[ix..].starts_with("?P<") {
884            // Named capture group using Python syntax: (?P<name>...)
885            self.curr_group += 1; // this is a capture group
886            if let Some(ParsedId {
887                id,
888                relative: None,
889                skip,
890            }) = parse_id(&self.re[ix + 2..], "<", ">", false)
891            {
892                self.named_groups.insert(id.to_string(), self.curr_group);
893                (None, skip + 2)
894            } else {
895                return Err(Error::ParseError(ix, ParseError::InvalidGroupName));
896            }
897        } else if self.re[ix..].starts_with("?P=") {
898            // Backref using Python syntax: (?P=name)
899            return self.parse_named_backref(ix + 3, "", ")", false);
900        } else if self.re[ix..].starts_with("?>") {
901            (None, 2)
902        } else if self.re[ix..].starts_with("?(") {
903            return self.parse_conditional(ix + 2, depth);
904        } else if self.re[ix..].starts_with("?P>") {
905            return self.parse_named_subroutine_call(ix + 3, "", ")", false);
906        } else if self.re[ix..].starts_with('?') {
907            return self.parse_flags(ix, depth);
908        } else {
909            self.curr_group += 1; // this is a capture group
910            (None, 0)
911        };
912        let ix = ix + skip;
913        let (ix, child) = self.parse_re(ix, depth)?;
914        let ix = self.check_for_close_paren(ix)?;
915        let result = match (la, skip) {
916            (Some(la), _) => Expr::LookAround(Box::new(child), la),
917            (None, 2) => Expr::AtomicGroup(Box::new(child)),
918            _ => Expr::Group(Box::new(child)),
919        };
920        Ok((ix, result))
921    }
922
923    fn check_for_close_paren(&self, ix: usize) -> Result<usize> {
924        let ix = self.optional_whitespace(ix)?;
925        if ix == self.re.len() {
926            return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
927        } else if self.re.as_bytes()[ix] != b')' {
928            return Err(Error::ParseError(
929                ix,
930                ParseError::GeneralParseError("expected close paren".to_string()),
931            ));
932        }
933        Ok(ix + 1)
934    }
935
936    // ix points to `?` in `(?`
937    fn parse_flags(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
938        let start = ix + 1;
939
940        fn unknown_flag(re: &str, start: usize, end: usize) -> Error {
941            let after_end = end + codepoint_len(re.as_bytes()[end]);
942            let s = format!("(?{}", &re[start..after_end]);
943            Error::ParseError(start, ParseError::UnknownFlag(s))
944        }
945
946        let mut ix = start;
947        let mut neg = false;
948        let oldflags = self.flags;
949        loop {
950            ix = self.optional_whitespace(ix)?;
951            if ix == self.re.len() {
952                return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
953            }
954            let b = self.re.as_bytes()[ix];
955            match b {
956                b'i' => self.update_flag(FLAG_CASEI, neg),
957                b'm' => self.update_flag(FLAG_MULTI, neg),
958                b's' => self.update_flag(FLAG_DOTNL, neg),
959                b'U' => self.update_flag(FLAG_SWAP_GREED, neg),
960                b'x' => self.update_flag(FLAG_IGNORE_SPACE, neg),
961                b'u' => {
962                    if neg {
963                        return Err(Error::ParseError(ix, ParseError::NonUnicodeUnsupported));
964                    }
965                }
966                b'-' => {
967                    if neg {
968                        return Err(unknown_flag(self.re, start, ix));
969                    }
970                    neg = true;
971                }
972                b')' => {
973                    if ix == start || neg && ix == start + 1 {
974                        return Err(unknown_flag(self.re, start, ix));
975                    }
976                    return Ok((ix + 1, Expr::Empty));
977                }
978                b':' => {
979                    if neg && ix == start + 1 {
980                        return Err(unknown_flag(self.re, start, ix));
981                    }
982                    ix += 1;
983                    let (ix, child) = self.parse_re(ix, depth)?;
984                    if ix == self.re.len() {
985                        return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
986                    } else if self.re.as_bytes()[ix] != b')' {
987                        return Err(Error::ParseError(
988                            ix,
989                            ParseError::GeneralParseError("expected close paren".to_string()),
990                        ));
991                    };
992                    self.flags = oldflags;
993                    return Ok((ix + 1, child));
994                }
995                _ => return Err(unknown_flag(self.re, start, ix)),
996            }
997            ix += 1;
998        }
999    }
1000
1001    // ix points to after the last ( in (?(
1002    fn parse_conditional(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
1003        if ix >= self.re.len() {
1004            return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
1005        }
1006        let bytes = self.re.as_bytes();
1007        // get the character after the open paren
1008        let b = bytes[ix];
1009        let (next, condition) = if b == b'\'' {
1010            self.parse_named_backref(ix, "'", "')", true)?
1011        } else if b == b'<' {
1012            self.parse_named_backref(ix, "<", ">)", true)?
1013        } else if b == b'+' || b == b'-' || b.is_ascii_digit() {
1014            self.parse_named_backref(ix, "", ")", true)?
1015        } else {
1016            let (next, condition) = self.parse_re(ix, depth)?;
1017            (self.check_for_close_paren(next)?, condition)
1018        };
1019        let (end, child) = self.parse_re(next, depth)?;
1020        if end == next {
1021            // Backreference validity checker
1022            if let Expr::Backref { group, .. } = condition {
1023                let after = self.check_for_close_paren(end)?;
1024                return Ok((after, Expr::BackrefExistsCondition(group)));
1025            } else {
1026                return Err(Error::ParseError(
1027                    end,
1028                    ParseError::GeneralParseError(
1029                        "expected conditional to be a backreference or at least an expression for when the condition is true".to_string()
1030                    )
1031                ));
1032            }
1033        }
1034        let if_true: Expr;
1035        let mut if_false: Expr = Expr::Empty;
1036        if let Expr::Alt(mut alternatives) = child {
1037            // the truth branch will be the first alternative
1038            if_true = alternatives.remove(0);
1039            // if there is only one alternative left, take it out the Expr::Alt
1040            if alternatives.len() == 1 {
1041                if_false = alternatives.pop().expect("expected 2 alternatives");
1042            } else {
1043                // otherwise the remaining branches become the false branch
1044                if_false = Expr::Alt(alternatives);
1045            }
1046        } else {
1047            // there is only one branch - the truth branch. i.e. "if" without "else"
1048            if_true = child;
1049        }
1050        let inner_condition = if let Expr::Backref { group, .. } = condition {
1051            Expr::BackrefExistsCondition(group)
1052        } else {
1053            condition
1054        };
1055
1056        let after = self.check_for_close_paren(end)?;
1057        Ok((
1058            after,
1059            if if_true == Expr::Empty && if_false == Expr::Empty {
1060                inner_condition
1061            } else {
1062                Expr::Conditional {
1063                    condition: Box::new(inner_condition),
1064                    true_branch: Box::new(if_true),
1065                    false_branch: Box::new(if_false),
1066                }
1067            },
1068        ))
1069    }
1070
1071    fn flag(&self, flag: u32) -> bool {
1072        let v = self.flags & flag;
1073        v == flag
1074    }
1075
1076    fn update_flag(&mut self, flag: u32, neg: bool) {
1077        if neg {
1078            self.flags &= !flag;
1079        } else {
1080            self.flags |= flag;
1081        }
1082    }
1083
1084    fn optional_whitespace(&self, mut ix: usize) -> Result<usize> {
1085        let bytes = self.re.as_bytes();
1086        loop {
1087            if ix == self.re.len() {
1088                return Ok(ix);
1089            }
1090            match bytes[ix] {
1091                b'#' if self.flag(FLAG_IGNORE_SPACE) => {
1092                    match bytes[ix..].iter().position(|&c| c == b'\n') {
1093                        Some(x) => ix += x + 1,
1094                        None => return Ok(self.re.len()),
1095                    }
1096                }
1097                b' ' | b'\r' | b'\n' | b'\t' if self.flag(FLAG_IGNORE_SPACE) => ix += 1,
1098                b'(' if bytes[ix..].starts_with(b"(?#") => {
1099                    ix += 3;
1100                    loop {
1101                        if ix >= self.re.len() {
1102                            return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
1103                        }
1104                        match bytes[ix] {
1105                            b')' => {
1106                                ix += 1;
1107                                break;
1108                            }
1109                            b'\\' => ix += 2,
1110                            _ => ix += 1,
1111                        }
1112                    }
1113                }
1114                _ => return Ok(ix),
1115            }
1116        }
1117    }
1118
1119    fn resolve_named_subroutine_calls(&mut self, expr: &mut Expr) {
1120        match expr {
1121            Expr::UnresolvedNamedSubroutineCall { name, .. } => {
1122                if let Some(group) = self.named_groups.get(name) {
1123                    *expr = Expr::SubroutineCall(*group);
1124                } else {
1125                    self.has_unresolved_subroutines = true;
1126                }
1127            }
1128            // recursively resolve in inner expressions
1129            Expr::Group(inner) | Expr::LookAround(inner, _) | Expr::AtomicGroup(inner) => {
1130                self.resolve_named_subroutine_calls(inner);
1131            }
1132            Expr::Concat(children) | Expr::Alt(children) => {
1133                for child in children {
1134                    self.resolve_named_subroutine_calls(child);
1135                }
1136            }
1137            Expr::Repeat { child, .. } => {
1138                self.resolve_named_subroutine_calls(child);
1139            }
1140            Expr::Conditional {
1141                condition,
1142                true_branch,
1143                false_branch,
1144            } => {
1145                self.resolve_named_subroutine_calls(condition);
1146                self.resolve_named_subroutine_calls(true_branch);
1147                self.resolve_named_subroutine_calls(false_branch);
1148            }
1149            _ => {}
1150        }
1151    }
1152}
1153
1154// return (ix, value)
1155pub(crate) fn parse_decimal(s: &str, ix: usize) -> Option<(usize, usize)> {
1156    let mut end = ix;
1157    while end < s.len() && s.as_bytes()[end].is_ascii_digit() {
1158        end += 1;
1159    }
1160    s[ix..end].parse::<usize>().ok().map(|val| (end, val))
1161}
1162
1163#[derive(Debug, PartialEq)]
1164pub(crate) struct ParsedId<'a> {
1165    pub id: &'a str,
1166    pub relative: Option<isize>,
1167    pub skip: usize,
1168}
1169
1170/// Attempts to parse an identifier, optionally followed by a relative number between the
1171/// specified opening and closing delimiters.  On success, returns
1172/// `Some((id, relative, skip))`, where `skip` is how much of the string was used.
1173pub(crate) fn parse_id<'a>(
1174    s: &'a str,
1175    open: &'_ str,
1176    close: &'_ str,
1177    allow_relative: bool,
1178) -> Option<ParsedId<'a>> {
1179    debug_assert!(!close.starts_with(is_id_char));
1180
1181    if !s.starts_with(open) || s.len() <= open.len() + close.len() {
1182        return None;
1183    }
1184
1185    let id_start = open.len();
1186    let mut iter = s[id_start..].char_indices().peekable();
1187    let after_id = iter.find(|(_, ch)| !is_id_char(*ch));
1188
1189    let id_len = match after_id.map(|(i, _)| i) {
1190        Some(id_len) => id_len,
1191        None if close.is_empty() => s.len(),
1192        _ => 0,
1193    };
1194
1195    let id_end = id_start + id_len;
1196    if id_len > 0 && s[id_end..].starts_with(close) {
1197        return Some(ParsedId {
1198            id: &s[id_start..id_end],
1199            relative: None,
1200            skip: id_end + close.len(),
1201        });
1202    } else if !allow_relative {
1203        return None;
1204    }
1205    let relative_sign = s.as_bytes()[id_end];
1206    if relative_sign == b'+' || relative_sign == b'-' {
1207        if let Some((end, relative_amount)) = parse_decimal(s, id_end + 1) {
1208            if s[end..].starts_with(close) {
1209                if relative_amount == 0 && id_len == 0 {
1210                    return None;
1211                }
1212                let relative_amount_signed = if relative_sign == b'-' {
1213                    -(relative_amount as isize)
1214                } else {
1215                    relative_amount as isize
1216                };
1217                return Some(ParsedId {
1218                    id: &s[id_start..id_end],
1219                    relative: Some(relative_amount_signed),
1220                    skip: end + close.len(),
1221                });
1222            }
1223        }
1224    }
1225    None
1226}
1227
1228fn is_id_char(c: char) -> bool {
1229    c.is_alphanumeric() || c == '_'
1230}
1231
1232fn is_hex_digit(b: u8) -> bool {
1233    b.is_ascii_digit() || (b'a' <= (b | 32) && (b | 32) <= b'f')
1234}
1235
1236pub(crate) fn make_literal(s: &str) -> Expr {
1237    make_literal_case_insensitive(s, false)
1238}
1239
1240pub(crate) fn make_literal_case_insensitive(s: &str, case_insensitive: bool) -> Expr {
1241    Expr::Literal {
1242        val: String::from(s),
1243        casei: case_insensitive,
1244    }
1245}
1246
1247#[cfg(test)]
1248mod tests {
1249    use alloc::boxed::Box;
1250    use alloc::string::{String, ToString};
1251    use alloc::{format, vec};
1252
1253    use crate::parse::{make_literal, make_literal_case_insensitive, parse_id};
1254    use crate::{Assertion, Expr};
1255    use crate::{LookAround::*, RegexOptions, SyntaxConfig};
1256
1257    fn p(s: &str) -> Expr {
1258        Expr::parse_tree(s).unwrap().expr
1259    }
1260
1261    fn parse_oniguruma(s: &str) -> crate::Result<Expr> {
1262        let mut options = RegexOptions::default();
1263        options.oniguruma_mode = true;
1264        options.pattern = String::from(s);
1265        Expr::parse_tree_with_flags(&options.pattern, options.compute_flags()).map(|tree| tree.expr)
1266    }
1267
1268    #[cfg_attr(feature = "track_caller", track_caller)]
1269    fn fail(s: &str) {
1270        assert!(
1271            Expr::parse_tree(s).is_err(),
1272            "Expected parse error, but was: {:?}",
1273            Expr::parse_tree(s)
1274        );
1275    }
1276
1277    #[cfg_attr(feature = "track_caller", track_caller)]
1278    fn assert_error(re: &str, expected_error: &str) {
1279        let result = Expr::parse_tree(re);
1280        assert!(
1281            result.is_err(),
1282            "Expected parse error, but was: {:?}",
1283            result
1284        );
1285        assert_eq!(&format!("{}", result.err().unwrap()), expected_error);
1286    }
1287
1288    #[cfg_attr(feature = "track_caller", track_caller)]
1289    fn assert_error_oniguruma(re: &str, expected_error: &str) {
1290        let result = parse_oniguruma(re);
1291        assert!(
1292            result.is_err(),
1293            "Expected parse error in Oniguruma mode, but was: {:?}",
1294            result
1295        );
1296        assert_eq!(&format!("{}", result.err().unwrap()), expected_error);
1297    }
1298
1299    #[test]
1300    fn empty() {
1301        assert_eq!(p(""), Expr::Empty);
1302    }
1303
1304    #[test]
1305    fn any() {
1306        assert_eq!(p("."), Expr::Any { newline: false });
1307        assert_eq!(p("(?s:.)"), Expr::Any { newline: true });
1308    }
1309
1310    #[test]
1311    fn start_text() {
1312        assert_eq!(p("^"), Expr::Assertion(Assertion::StartText));
1313    }
1314
1315    #[test]
1316    fn end_text() {
1317        assert_eq!(p("$"), Expr::Assertion(Assertion::EndText));
1318    }
1319
1320    #[test]
1321    fn end_text_before_empty_lines() {
1322        assert_eq!(
1323            p("\\Z"),
1324            Expr::LookAround(
1325                Box::new(Expr::Delegate {
1326                    inner: "\\n*$".to_string(),
1327                    size: 0,
1328                    casei: false,
1329                }),
1330                LookAhead,
1331            )
1332        );
1333    }
1334
1335    #[test]
1336    fn literal() {
1337        assert_eq!(p("a"), make_literal("a"));
1338    }
1339
1340    #[test]
1341    fn literal_special() {
1342        assert_eq!(p("}"), make_literal("}"));
1343        assert_eq!(p("]"), make_literal("]"));
1344    }
1345
1346    #[test]
1347    fn parse_id_test() {
1348        use crate::parse::ParsedId;
1349        fn create_id(id: &str, relative: Option<isize>, skip: usize) -> Option<ParsedId<'_>> {
1350            Some(ParsedId { id, relative, skip })
1351        }
1352        assert_eq!(parse_id("foo.", "", "", true), create_id("foo", None, 3));
1353        assert_eq!(parse_id("1.", "", "", true), create_id("1", None, 1));
1354        assert_eq!(parse_id("{foo}", "{", "}", true), create_id("foo", None, 5));
1355        assert_eq!(parse_id("{foo.", "{", "}", true), None);
1356        assert_eq!(parse_id("{foo", "{", "}", true), None);
1357        assert_eq!(parse_id("{}", "{", "}", true), None);
1358        assert_eq!(parse_id("", "", "", true), None);
1359        assert_eq!(parse_id("{-1}", "{", "}", true), create_id("", Some(-1), 4));
1360        assert_eq!(parse_id("{-1}", "{", "}", false), None);
1361        assert_eq!(parse_id("{-a}", "{", "}", true), None);
1362        assert_eq!(parse_id("{-a}", "{", "}", false), None);
1363        assert_eq!(parse_id("{+a}", "{", "}", false), None);
1364        assert_eq!(parse_id("+a", "", "", false), None);
1365        assert_eq!(parse_id("-a", "", "", false), None);
1366        assert_eq!(parse_id("2+a", "", "", false), create_id("2", None, 1));
1367        assert_eq!(parse_id("2-a", "", "", false), create_id("2", None, 1));
1368
1369        assert_eq!(parse_id("<+1>", "<", ">", true), create_id("", Some(1), 4));
1370        assert_eq!(parse_id("<-3>", "<", ">", true), create_id("", Some(-3), 4));
1371        assert_eq!(
1372            parse_id("<n+1>", "<", ">", true),
1373            create_id("n", Some(1), 5)
1374        );
1375        assert_eq!(
1376            parse_id("<n-1>", "<", ">", true),
1377            create_id("n", Some(-1), 5)
1378        );
1379        assert_eq!(parse_id("<>", "<", ">", true), None);
1380        assert_eq!(parse_id("<", "<", ">", true), None);
1381        assert_eq!(parse_id("<+0>", "<", ">", true), None);
1382        assert_eq!(parse_id("<-0>", "<", ">", true), None);
1383        assert_eq!(
1384            parse_id("<n+0>", "<", ">", true),
1385            create_id("n", Some(0), 5)
1386        );
1387        assert_eq!(
1388            parse_id("<n-0>", "<", ">", true),
1389            create_id("n", Some(0), 5)
1390        );
1391        assert_eq!(
1392            parse_id("<2-0>", "<", ">", true),
1393            create_id("2", Some(0), 5)
1394        );
1395        assert_eq!(
1396            parse_id("<2+0>", "<", ">", true),
1397            create_id("2", Some(0), 5)
1398        );
1399        assert_eq!(
1400            parse_id("<2+1>", "<", ">", true),
1401            create_id("2", Some(1), 5)
1402        );
1403        assert_eq!(
1404            parse_id("<2-1>", "<", ">", true),
1405            create_id("2", Some(-1), 5)
1406        );
1407    }
1408
1409    #[test]
1410    fn literal_unescaped_opening_curly() {
1411        // `{` in position where quantifier is not allowed results in literal `{`
1412        assert_eq!(p("{"), make_literal("{"));
1413        assert_eq!(p("({)"), Expr::Group(Box::new(make_literal("{"),)));
1414        assert_eq!(
1415            p("a|{"),
1416            Expr::Alt(vec![make_literal("a"), make_literal("{"),])
1417        );
1418        assert_eq!(
1419            p("{{2}"),
1420            Expr::Repeat {
1421                child: Box::new(make_literal("{")),
1422                lo: 2,
1423                hi: 2,
1424                greedy: true
1425            }
1426        );
1427    }
1428
1429    #[test]
1430    fn literal_escape() {
1431        assert_eq!(p("\\'"), make_literal("'"));
1432        assert_eq!(p("\\\""), make_literal("\""));
1433        assert_eq!(p("\\ "), make_literal(" "));
1434        assert_eq!(p("\\xA0"), make_literal("\u{A0}"));
1435        assert_eq!(p("\\x{1F4A9}"), make_literal("\u{1F4A9}"));
1436        assert_eq!(p("\\x{000000B7}"), make_literal("\u{B7}"));
1437        assert_eq!(p("\\u21D2"), make_literal("\u{21D2}"));
1438        assert_eq!(p("\\u{21D2}"), make_literal("\u{21D2}"));
1439        assert_eq!(p("\\u21D2x"), p("\u{21D2}x"));
1440        assert_eq!(p("\\U0001F60A"), make_literal("\u{1F60A}"));
1441        assert_eq!(p("\\U{0001F60A}"), make_literal("\u{1F60A}"));
1442    }
1443
1444    #[test]
1445    fn hex_escape() {
1446        assert_eq!(
1447            p("\\h"),
1448            Expr::Delegate {
1449                inner: String::from("[0-9A-Fa-f]"),
1450                size: 1,
1451                casei: false
1452            }
1453        );
1454        assert_eq!(
1455            p("\\H"),
1456            Expr::Delegate {
1457                inner: String::from("[^0-9A-Fa-f]"),
1458                size: 1,
1459                casei: false
1460            }
1461        );
1462    }
1463
1464    #[test]
1465    fn invalid_escape() {
1466        assert_error(
1467            "\\",
1468            "Parsing error at position 0: Backslash without following character",
1469        );
1470        assert_error("\\q", "Parsing error at position 0: Invalid escape: \\q");
1471        assert_error("\\u", "Parsing error at position 2: Invalid hex escape");
1472        assert_error("\\U", "Parsing error at position 2: Invalid hex escape");
1473        assert_error("\\x", "Parsing error at position 2: Invalid hex escape");
1474        assert_error("\\xAG", "Parsing error at position 2: Invalid hex escape");
1475        assert_error("\\xA", "Parsing error at position 2: Invalid hex escape");
1476        assert_error("\\x{}", "Parsing error at position 2: Invalid hex escape");
1477        assert_error("\\x{AG}", "Parsing error at position 2: Invalid hex escape");
1478        assert_error("\\x{42", "Parsing error at position 2: Invalid hex escape");
1479        assert_error(
1480            "\\x{D800}",
1481            "Parsing error at position 2: Invalid codepoint for hex or unicode escape",
1482        );
1483        assert_error(
1484            "\\x{110000}",
1485            "Parsing error at position 2: Invalid codepoint for hex or unicode escape",
1486        );
1487        assert_error("\\u123", "Parsing error at position 2: Invalid hex escape");
1488        assert_error("\\u123x", "Parsing error at position 2: Invalid hex escape");
1489        assert_error("\\u{}", "Parsing error at position 2: Invalid hex escape");
1490        assert_error(
1491            "\\U1234567",
1492            "Parsing error at position 2: Invalid hex escape",
1493        );
1494        assert_error("\\U{}", "Parsing error at position 2: Invalid hex escape");
1495    }
1496
1497    #[test]
1498    fn concat() {
1499        assert_eq!(
1500            p("ab"),
1501            Expr::Concat(vec![make_literal("a"), make_literal("b"),])
1502        );
1503    }
1504
1505    #[test]
1506    fn alt() {
1507        assert_eq!(
1508            p("a|b"),
1509            Expr::Alt(vec![make_literal("a"), make_literal("b"),])
1510        );
1511    }
1512
1513    #[test]
1514    fn group() {
1515        assert_eq!(p("(a)"), Expr::Group(Box::new(make_literal("a"),)));
1516    }
1517
1518    #[test]
1519    fn named_group() {
1520        assert_eq!(p("(?'name'a)"), Expr::Group(Box::new(make_literal("a"),)));
1521        assert_eq!(p("(?<name>a)"), Expr::Group(Box::new(make_literal("a"),)));
1522    }
1523
1524    #[test]
1525    fn group_repeat() {
1526        assert_eq!(
1527            p("(a){2}"),
1528            Expr::Repeat {
1529                child: Box::new(Expr::Group(Box::new(make_literal("a")))),
1530                lo: 2,
1531                hi: 2,
1532                greedy: true
1533            }
1534        );
1535    }
1536
1537    #[test]
1538    fn repeat() {
1539        assert_eq!(
1540            p("a{2,42}"),
1541            Expr::Repeat {
1542                child: Box::new(make_literal("a")),
1543                lo: 2,
1544                hi: 42,
1545                greedy: true
1546            }
1547        );
1548        assert_eq!(
1549            p("a{2,}"),
1550            Expr::Repeat {
1551                child: Box::new(make_literal("a")),
1552                lo: 2,
1553                hi: usize::MAX,
1554                greedy: true
1555            }
1556        );
1557        assert_eq!(
1558            p("a{2}"),
1559            Expr::Repeat {
1560                child: Box::new(make_literal("a")),
1561                lo: 2,
1562                hi: 2,
1563                greedy: true
1564            }
1565        );
1566        assert_eq!(
1567            p("a{,2}"),
1568            Expr::Repeat {
1569                child: Box::new(make_literal("a")),
1570                lo: 0,
1571                hi: 2,
1572                greedy: true
1573            }
1574        );
1575
1576        assert_eq!(
1577            p("a{2,42}?"),
1578            Expr::Repeat {
1579                child: Box::new(make_literal("a")),
1580                lo: 2,
1581                hi: 42,
1582                greedy: false
1583            }
1584        );
1585        assert_eq!(
1586            p("a{2,}?"),
1587            Expr::Repeat {
1588                child: Box::new(make_literal("a")),
1589                lo: 2,
1590                hi: usize::MAX,
1591                greedy: false
1592            }
1593        );
1594        assert_eq!(
1595            p("a{2}?"),
1596            Expr::Repeat {
1597                child: Box::new(make_literal("a")),
1598                lo: 2,
1599                hi: 2,
1600                greedy: false
1601            }
1602        );
1603        assert_eq!(
1604            p("a{,2}?"),
1605            Expr::Repeat {
1606                child: Box::new(make_literal("a")),
1607                lo: 0,
1608                hi: 2,
1609                greedy: false
1610            }
1611        );
1612    }
1613
1614    #[test]
1615    fn invalid_repeat() {
1616        // Invalid repeat syntax results in literal
1617        assert_eq!(
1618            p("a{"),
1619            Expr::Concat(vec![make_literal("a"), make_literal("{"),])
1620        );
1621        assert_eq!(
1622            p("a{6"),
1623            Expr::Concat(vec![
1624                make_literal("a"),
1625                make_literal("{"),
1626                make_literal("6"),
1627            ])
1628        );
1629        assert_eq!(
1630            p("a{6,"),
1631            Expr::Concat(vec![
1632                make_literal("a"),
1633                make_literal("{"),
1634                make_literal("6"),
1635                make_literal(","),
1636            ])
1637        );
1638        assert_eq!(
1639            p("a{1,A}"),
1640            Expr::Concat(vec![
1641                make_literal("a"),
1642                make_literal("{"),
1643                make_literal("1"),
1644                make_literal(","),
1645                make_literal("A"),
1646                make_literal("}"),
1647            ])
1648        );
1649        assert_eq!(
1650            p("a{1,2A}"),
1651            Expr::Concat(vec![
1652                make_literal("a"),
1653                make_literal("{"),
1654                make_literal("1"),
1655                make_literal(","),
1656                make_literal("2"),
1657                make_literal("A"),
1658                make_literal("}"),
1659            ])
1660        );
1661    }
1662
1663    #[test]
1664    fn quantifiers_on_assertions_in_regex_mode() {
1665        assert_eq!(
1666            p(r"\b{1}"),
1667            Expr::Repeat {
1668                child: Box::new(Expr::Assertion(Assertion::WordBoundary)),
1669                lo: 1,
1670                hi: 1,
1671                greedy: true
1672            }
1673        );
1674        assert_eq!(
1675            p(r"\B{2}"),
1676            Expr::Repeat {
1677                child: Box::new(Expr::Assertion(Assertion::NotWordBoundary)),
1678                lo: 2,
1679                hi: 2,
1680                greedy: true
1681            }
1682        );
1683        assert_eq!(
1684            p(r"^{3}"),
1685            Expr::Repeat {
1686                child: Box::new(Expr::Assertion(Assertion::StartText)),
1687                lo: 3,
1688                hi: 3,
1689                greedy: true
1690            }
1691        );
1692        assert_eq!(
1693            p(r"${1,5}"),
1694            Expr::Repeat {
1695                child: Box::new(Expr::Assertion(Assertion::EndText)),
1696                lo: 1,
1697                hi: 5,
1698                greedy: true
1699            }
1700        );
1701        assert_eq!(
1702            p(r"\A*"),
1703            Expr::Repeat {
1704                child: Box::new(Expr::Assertion(Assertion::StartText)),
1705                lo: 0,
1706                hi: usize::MAX,
1707                greedy: true
1708            }
1709        );
1710        assert_eq!(
1711            p(r"\z+"),
1712            Expr::Repeat {
1713                child: Box::new(Expr::Assertion(Assertion::EndText)),
1714                lo: 1,
1715                hi: usize::MAX,
1716                greedy: true
1717            }
1718        );
1719        assert_eq!(
1720            p(r"^?"),
1721            Expr::Repeat {
1722                child: Box::new(Expr::Assertion(Assertion::StartText)),
1723                lo: 0,
1724                hi: 1,
1725                greedy: true
1726            }
1727        );
1728        assert_eq!(
1729            p(r"${2}"),
1730            Expr::Repeat {
1731                child: Box::new(Expr::Assertion(Assertion::EndText)),
1732                lo: 2,
1733                hi: 2,
1734                greedy: true
1735            }
1736        );
1737        assert_eq!(
1738            p(r"(?m)^?"),
1739            Expr::Repeat {
1740                child: Box::new(Expr::Assertion(Assertion::StartLine { crlf: false })),
1741                lo: 0,
1742                hi: 1,
1743                greedy: true
1744            }
1745        );
1746        assert_eq!(
1747            p(r"(?m)${2}"),
1748            Expr::Repeat {
1749                child: Box::new(Expr::Assertion(Assertion::EndLine { crlf: false })),
1750                lo: 2,
1751                hi: 2,
1752                greedy: true
1753            }
1754        );
1755        assert_eq!(
1756            p(r"\b+"),
1757            Expr::Repeat {
1758                child: Box::new(Expr::Assertion(Assertion::WordBoundary)),
1759                lo: 1,
1760                hi: usize::MAX,
1761                greedy: true
1762            }
1763        );
1764    }
1765
1766    #[test]
1767    fn delegate_zero() {
1768        assert_eq!(p("\\b"), Expr::Assertion(Assertion::WordBoundary),);
1769        assert_eq!(p("\\B"), Expr::Assertion(Assertion::NotWordBoundary),);
1770    }
1771
1772    #[test]
1773    fn delegate_named_group() {
1774        assert_eq!(
1775            p("\\p{Greek}"),
1776            Expr::Delegate {
1777                inner: String::from("\\p{Greek}"),
1778                size: 1,
1779                casei: false
1780            }
1781        );
1782        assert_eq!(
1783            p("\\pL"),
1784            Expr::Delegate {
1785                inner: String::from("\\pL"),
1786                size: 1,
1787                casei: false
1788            }
1789        );
1790        assert_eq!(
1791            p("\\P{Greek}"),
1792            Expr::Delegate {
1793                inner: String::from("\\P{Greek}"),
1794                size: 1,
1795                casei: false
1796            }
1797        );
1798        assert_eq!(
1799            p("\\PL"),
1800            Expr::Delegate {
1801                inner: String::from("\\PL"),
1802                size: 1,
1803                casei: false
1804            }
1805        );
1806        assert_eq!(
1807            p("(?i)\\p{Ll}"),
1808            Expr::Delegate {
1809                inner: String::from("\\p{Ll}"),
1810                size: 1,
1811                casei: true
1812            }
1813        );
1814    }
1815
1816    #[test]
1817    fn backref() {
1818        assert_eq!(
1819            p("(.)\\1"),
1820            Expr::Concat(vec![
1821                Expr::Group(Box::new(Expr::Any { newline: false })),
1822                Expr::Backref {
1823                    group: 1,
1824                    casei: false,
1825                },
1826            ])
1827        );
1828    }
1829
1830    #[test]
1831    fn named_backref() {
1832        assert_eq!(
1833            p("(?<i>.)\\k<i>"),
1834            Expr::Concat(vec![
1835                Expr::Group(Box::new(Expr::Any { newline: false })),
1836                Expr::Backref {
1837                    group: 1,
1838                    casei: false,
1839                },
1840            ])
1841        );
1842    }
1843
1844    #[test]
1845    fn relative_backref() {
1846        assert_eq!(
1847            p(r"(a)(.)\k<-1>"),
1848            Expr::Concat(vec![
1849                Expr::Group(Box::new(make_literal("a"))),
1850                Expr::Group(Box::new(Expr::Any { newline: false })),
1851                Expr::Backref {
1852                    group: 2,
1853                    casei: false,
1854                },
1855            ])
1856        );
1857
1858        assert_eq!(
1859            p(r"(a)\k<+1>(.)"),
1860            Expr::Concat(vec![
1861                Expr::Group(Box::new(make_literal("a"))),
1862                Expr::Backref {
1863                    group: 2,
1864                    casei: false,
1865                },
1866                Expr::Group(Box::new(Expr::Any { newline: false })),
1867            ])
1868        );
1869
1870        fail("(?P<->.)");
1871        fail("(.)(?P=-)");
1872        fail(r"(a)\k<-0>(.)");
1873        fail(r"(a)\k<+0>(.)");
1874        fail(r"(a)\k<+>(.)");
1875        fail(r"(a)\k<->(.)");
1876        fail(r"(a)\k<>(.)");
1877    }
1878
1879    #[test]
1880    fn relative_backref_with_recursion_level() {
1881        assert_eq!(
1882            p(r"()\k<1+3>"),
1883            Expr::Concat(vec![
1884                Expr::Group(Box::new(Expr::Empty)),
1885                Expr::BackrefWithRelativeRecursionLevel {
1886                    group: 1,
1887                    relative_level: 3,
1888                    casei: false,
1889                },
1890            ]),
1891        );
1892
1893        assert_eq!(
1894            p(r"()\k<1-0>"),
1895            Expr::Concat(vec![
1896                Expr::Group(Box::new(Expr::Empty)),
1897                Expr::BackrefWithRelativeRecursionLevel {
1898                    group: 1,
1899                    relative_level: 0,
1900                    casei: false,
1901                },
1902            ]),
1903        );
1904
1905        assert_eq!(
1906            p(r"(?<n>)\k<n+3>"),
1907            Expr::Concat(vec![
1908                Expr::Group(Box::new(Expr::Empty)),
1909                Expr::BackrefWithRelativeRecursionLevel {
1910                    group: 1,
1911                    relative_level: 3,
1912                    casei: false,
1913                },
1914            ]),
1915        );
1916
1917        assert_eq!(
1918            p(r"(?<n>)\k<n-3>"),
1919            Expr::Concat(vec![
1920                Expr::Group(Box::new(Expr::Empty)),
1921                Expr::BackrefWithRelativeRecursionLevel {
1922                    group: 1,
1923                    relative_level: -3,
1924                    casei: false,
1925                }
1926            ]),
1927        );
1928
1929        assert_eq!(
1930            p(r"\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z"),
1931            Expr::Concat(vec![
1932                Expr::Assertion(Assertion::StartText),
1933                Expr::Group(Box::new(Expr::Alt(vec![
1934                    Expr::Empty,
1935                    Expr::Any { newline: false },
1936                    Expr::Concat(vec![
1937                        Expr::Group(Box::new(Expr::Any { newline: false })),
1938                        Expr::SubroutineCall(1),
1939                        Expr::BackrefWithRelativeRecursionLevel {
1940                            group: 2,
1941                            relative_level: 0,
1942                            casei: false,
1943                        },
1944                    ])
1945                ]))),
1946                Expr::Assertion(Assertion::EndText)
1947            ]),
1948        );
1949    }
1950
1951    #[test]
1952    fn relative_subroutine_call() {
1953        assert_eq!(
1954            p(r"(a)(.)\g<-1>"),
1955            Expr::Concat(vec![
1956                Expr::Group(Box::new(make_literal("a"))),
1957                Expr::Group(Box::new(Expr::Any { newline: false })),
1958                Expr::SubroutineCall(2),
1959            ])
1960        );
1961
1962        assert_eq!(
1963            p(r"(a)\g<+1>(.)"),
1964            Expr::Concat(vec![
1965                Expr::Group(Box::new(make_literal("a"))),
1966                Expr::SubroutineCall(2),
1967                Expr::Group(Box::new(Expr::Any { newline: false })),
1968            ])
1969        );
1970
1971        fail(r"(a)\g<-0>(.)");
1972        fail(r"(a)\g<+0>(.)");
1973        fail(r"(a)\g<+>(.)");
1974        fail(r"(a)\g<->(.)");
1975        fail(r"(a)\g<>(.)");
1976    }
1977
1978    #[test]
1979    fn lookaround() {
1980        assert_eq!(
1981            p("(?=a)"),
1982            Expr::LookAround(Box::new(make_literal("a")), LookAhead)
1983        );
1984        assert_eq!(
1985            p("(?!a)"),
1986            Expr::LookAround(Box::new(make_literal("a")), LookAheadNeg)
1987        );
1988        assert_eq!(
1989            p("(?<=a)"),
1990            Expr::LookAround(Box::new(make_literal("a")), LookBehind)
1991        );
1992        assert_eq!(
1993            p("(?<!a)"),
1994            Expr::LookAround(Box::new(make_literal("a")), LookBehindNeg)
1995        );
1996    }
1997
1998    #[test]
1999    fn shy_group() {
2000        assert_eq!(
2001            p("(?:ab)c"),
2002            Expr::Concat(vec![
2003                Expr::Concat(vec![make_literal("a"), make_literal("b"),]),
2004                make_literal("c"),
2005            ])
2006        );
2007    }
2008
2009    #[test]
2010    fn flag_state() {
2011        assert_eq!(p("(?s)."), Expr::Any { newline: true });
2012        assert_eq!(p("(?s:(?-s:.))"), Expr::Any { newline: false });
2013        assert_eq!(
2014            p("(?s:.)."),
2015            Expr::Concat(vec![
2016                Expr::Any { newline: true },
2017                Expr::Any { newline: false },
2018            ])
2019        );
2020        assert_eq!(
2021            p("(?:(?s).)."),
2022            Expr::Concat(vec![
2023                Expr::Any { newline: true },
2024                Expr::Any { newline: false },
2025            ])
2026        );
2027    }
2028
2029    #[test]
2030    fn flag_multiline() {
2031        assert_eq!(p("^"), Expr::Assertion(Assertion::StartText));
2032        assert_eq!(
2033            p("(?m:^)"),
2034            Expr::Assertion(Assertion::StartLine { crlf: false })
2035        );
2036        assert_eq!(p("$"), Expr::Assertion(Assertion::EndText));
2037        assert_eq!(
2038            p("(?m:$)"),
2039            Expr::Assertion(Assertion::EndLine { crlf: false })
2040        );
2041    }
2042
2043    #[test]
2044    fn flag_swap_greed() {
2045        assert_eq!(p("a*"), p("(?U:a*?)"));
2046        assert_eq!(p("a*?"), p("(?U:a*)"));
2047    }
2048
2049    #[test]
2050    fn invalid_flags() {
2051        assert!(Expr::parse_tree("(?").is_err());
2052        assert!(Expr::parse_tree("(?)").is_err());
2053        assert!(Expr::parse_tree("(?-)").is_err());
2054        assert!(Expr::parse_tree("(?-:a)").is_err());
2055        assert!(Expr::parse_tree("(?q:a)").is_err());
2056    }
2057
2058    #[test]
2059    fn lifetime() {
2060        assert_eq!(
2061            p("\\'[a-zA-Z_][a-zA-Z0-9_]*(?!\\')\\b"),
2062            Expr::Concat(vec![
2063                make_literal("'"),
2064                Expr::Delegate {
2065                    inner: String::from("[a-zA-Z_]"),
2066                    size: 1,
2067                    casei: false
2068                },
2069                Expr::Repeat {
2070                    child: Box::new(Expr::Delegate {
2071                        inner: String::from("[a-zA-Z0-9_]"),
2072                        size: 1,
2073                        casei: false
2074                    }),
2075                    lo: 0,
2076                    hi: usize::MAX,
2077                    greedy: true
2078                },
2079                Expr::LookAround(Box::new(make_literal("'")), LookAheadNeg),
2080                Expr::Assertion(Assertion::WordBoundary),
2081            ])
2082        );
2083    }
2084
2085    #[test]
2086    fn ignore_whitespace() {
2087        assert_eq!(p("(?x: )"), p(""));
2088        assert_eq!(p("(?x) | "), p("|"));
2089        assert_eq!(p("(?x: a )"), p("a"));
2090        assert_eq!(p("(?x: a # ) bobby tables\n b )"), p("ab"));
2091        assert_eq!(p("(?x: a | b )"), p("a|b"));
2092        assert_eq!(p("(?x: ( a b ) )"), p("(ab)"));
2093        assert_eq!(p("(?x: a + )"), p("a+"));
2094        assert_eq!(p("(?x: a {2} )"), p("a{2}"));
2095        assert_eq!(p("(?x: a { 2 } )"), p("a{2}"));
2096        assert_eq!(p("(?x: a { 2 , } )"), p("a{2,}"));
2097        assert_eq!(p("(?x: a { , 2 } )"), p("a{,2}"));
2098        assert_eq!(p("(?x: a { 2 , 3 } )"), p("a{2,3}"));
2099        assert_eq!(p("(?x: a { 2 , 3 } ? )"), p("a{2,3}?"));
2100        assert_eq!(p("(?x: ( ? i : . ) )"), p("(?i:.)"));
2101        assert_eq!(p("(?x: ( ?= a ) )"), p("(?=a)"));
2102        assert_eq!(p("(?x: [ ] )"), p("[ ]"));
2103        assert_eq!(p("(?x: [ ^] )"), p("[ ^]"));
2104        assert_eq!(p("(?x: [a - z] )"), p("[a - z]"));
2105        assert_eq!(p("(?x: [ \\] \\\\] )"), p("[ \\] \\\\]"));
2106        assert_eq!(p("(?x: a\\ b )"), p("a b"));
2107        assert_eq!(p("(?x: a (?-x:#) b )"), p("a#b"));
2108        assert_eq!(p("(?x: a (?# b))c"), p("ac"));
2109        assert_eq!(p("(?x: \\b { s t a r t} a )"), p("\\b{start}a"));
2110        assert_eq!(p("(?x: \\b {end} )"), p("\\b{end}"));
2111        assert_eq!(p("(?x: \\b { start-half } )"), p("\\b{start-half}"));
2112        assert_eq!(p("(?x: \\b { e n d - h a l f } )"), p("\\b{end-half}"));
2113        assert_eq!(p("(?x: \\b{s\nt\ra\trt} a)"), p("\\b{start}a"));
2114        assert_eq!(p("(?x: \\u { 0 0 3 2} a)"), p("\\u{0032}a"));
2115        assert_eq!(p("(?x: \\U { 0001 F600 } a)"), p("\\U{0001F600}a"));
2116        assert_eq!(p("(?x: \\x { 3 2} a)"), p("\\x{32}a"));
2117        assert_eq!(p("(?x: \\b { s t a # x\nr t} a )"), p("\\b{start}a"));
2118        assert_eq!(p("(?x: \\b { s t a (?# x)r t} a )"), p("\\b{start}a"));
2119        assert_eq!(p("(?x: \\u { 0 0 # x\n3 2} a)"), p("\\u{0032}a"));
2120        assert_eq!(p("(?x: \\u { 0 0 (?# x)3 2} a)"), p("\\u{0032}a"));
2121    }
2122
2123    #[test]
2124    fn comments() {
2125        assert_eq!(p(r"ab(?# comment)"), p("ab"));
2126        assert_eq!(p(r"ab(?#)"), p("ab"));
2127        assert_eq!(p(r"(?# comment 1)(?# comment 2)ab"), p("ab"));
2128        assert_eq!(p(r"ab(?# comment \))c"), p("abc"));
2129        assert_eq!(p(r"ab(?# comment \\)c"), p("abc"));
2130        assert_eq!(p(r"ab(?# comment ()c"), p("abc"));
2131        assert_eq!(p(r"ab(?# comment)*"), p("ab*"));
2132        fail(r"ab(?# comment");
2133        fail(r"ab(?# comment\");
2134    }
2135
2136    #[test]
2137    fn atomic_group() {
2138        assert_eq!(p("(?>a)"), Expr::AtomicGroup(Box::new(make_literal("a"))));
2139    }
2140
2141    #[test]
2142    fn possessive() {
2143        assert_eq!(
2144            p("a++"),
2145            Expr::AtomicGroup(Box::new(Expr::Repeat {
2146                child: Box::new(make_literal("a")),
2147                lo: 1,
2148                hi: usize::MAX,
2149                greedy: true
2150            }))
2151        );
2152        assert_eq!(
2153            p("a*+"),
2154            Expr::AtomicGroup(Box::new(Expr::Repeat {
2155                child: Box::new(make_literal("a")),
2156                lo: 0,
2157                hi: usize::MAX,
2158                greedy: true
2159            }))
2160        );
2161        assert_eq!(
2162            p("a?+"),
2163            Expr::AtomicGroup(Box::new(Expr::Repeat {
2164                child: Box::new(make_literal("a")),
2165                lo: 0,
2166                hi: 1,
2167                greedy: true
2168            }))
2169        );
2170    }
2171
2172    #[test]
2173    fn invalid_backref() {
2174        // only syntactic tests; see similar test in analyze module
2175        assert_error(
2176            r".\18446744073709551616",
2177            "Parsing error at position 2: Invalid back reference",
2178        ); // unreasonably large number
2179        assert_error(r".\c", "Parsing error at position 1: Invalid escape: \\c"); // not decimal
2180
2181        assert_error(
2182            r"a\1",
2183            "Parsing error at position 2: Invalid back reference",
2184        ); // invalid back reference according to regex length - not long enough to contain that many paren pairs
2185        assert_error(
2186            r"(a)\2",
2187            "Parsing error at position 4: Invalid back reference",
2188        ); // invalid back reference according to regex length - not long enough to contain that many paren pairs
2189    }
2190
2191    #[test]
2192    fn invalid_group_name_backref() {
2193        assert_error(
2194            "\\k<id>(?<id>.)",
2195            "Parsing error at position 2: Invalid group name in back reference: id",
2196        );
2197    }
2198
2199    #[test]
2200    fn named_backref_only() {
2201        assert_error("(?<id>.)\\1", "Error compiling regex: Numbered backref/call not allowed because named group was used, use a named backref instead");
2202        assert_error("(a)\\1(?<name>b)", "Error compiling regex: Numbered backref/call not allowed because named group was used, use a named backref instead");
2203    }
2204
2205    #[test]
2206    fn invalid_group_name() {
2207        assert_error(
2208            "(?<id)",
2209            "Parsing error at position 1: Could not parse group name",
2210        );
2211        assert_error(
2212            "(?<>)",
2213            "Parsing error at position 1: Could not parse group name",
2214        );
2215        assert_error(
2216            "(?<#>)",
2217            "Parsing error at position 1: Could not parse group name",
2218        );
2219        assert_error(
2220            "\\kxxx<id>",
2221            "Parsing error at position 2: Could not parse group name",
2222        );
2223        // "-" can only be used after a name for relative recursion level, so must be followed by a number
2224        assert_error(
2225            "\\k<id-withdash>",
2226            "Parsing error at position 2: Could not parse group name",
2227        );
2228        assert_error(
2229            "\\k<-id>",
2230            "Parsing error at position 2: Could not parse group name",
2231        );
2232    }
2233
2234    #[test]
2235    fn unknown_flag() {
2236        assert_error(
2237            "(?-:a)",
2238            "Parsing error at position 2: Unknown group flag: (?-:",
2239        );
2240        assert_error(
2241            "(?)",
2242            "Parsing error at position 2: Unknown group flag: (?)",
2243        );
2244        assert_error(
2245            "(?--)",
2246            "Parsing error at position 2: Unknown group flag: (?--",
2247        );
2248        // Check that we don't split on char boundary
2249        assert_error(
2250            "(?\u{1F60A})",
2251            "Parsing error at position 2: Unknown group flag: (?\u{1F60A}",
2252        );
2253    }
2254
2255    #[test]
2256    fn no_quantifiers_on_lookarounds() {
2257        assert_error(
2258            "(?=hello)+",
2259            "Parsing error at position 9: Target of repeat operator is invalid",
2260        );
2261        assert_error(
2262            "(?<!hello)*",
2263            "Parsing error at position 10: Target of repeat operator is invalid",
2264        );
2265        assert_error(
2266            "(?<=hello){2,3}",
2267            "Parsing error at position 14: Target of repeat operator is invalid",
2268        );
2269        assert_error(
2270            "(?!hello)?",
2271            "Parsing error at position 9: Target of repeat operator is invalid",
2272        );
2273        assert_error(
2274            "(a|b|?)",
2275            "Parsing error at position 5: Target of repeat operator is invalid",
2276        );
2277    }
2278
2279    #[test]
2280    fn no_quantifiers_on_assertions_in_oniguruma_mode() {
2281        assert_error_oniguruma(
2282            r"\b{1}",
2283            "Parsing error at position 4: Target of repeat operator is invalid",
2284        );
2285        assert_error_oniguruma(
2286            r"\B{2}",
2287            "Parsing error at position 4: Target of repeat operator is invalid",
2288        );
2289        assert_error_oniguruma(
2290            r"^{3}",
2291            "Parsing error at position 3: Target of repeat operator is invalid",
2292        );
2293        assert_error_oniguruma(
2294            r"${1,5}",
2295            "Parsing error at position 5: Target of repeat operator is invalid",
2296        );
2297        assert_error_oniguruma(
2298            r"\A*",
2299            "Parsing error at position 2: Target of repeat operator is invalid",
2300        );
2301        assert_error_oniguruma(
2302            r"\z+",
2303            "Parsing error at position 2: Target of repeat operator is invalid",
2304        );
2305        assert_error_oniguruma(
2306            r"^?",
2307            "Parsing error at position 1: Target of repeat operator is invalid",
2308        );
2309        assert_error_oniguruma(
2310            r"${2}",
2311            "Parsing error at position 3: Target of repeat operator is invalid",
2312        );
2313        assert_error_oniguruma(
2314            r"(?m)^?",
2315            "Parsing error at position 5: Target of repeat operator is invalid",
2316        );
2317        assert_error_oniguruma(
2318            r"(?m)${2}",
2319            "Parsing error at position 7: Target of repeat operator is invalid",
2320        );
2321        assert_error_oniguruma(
2322            r"\b+",
2323            "Parsing error at position 2: Target of repeat operator is invalid",
2324        );
2325    }
2326
2327    #[test]
2328    fn keepout() {
2329        assert_eq!(
2330            p("a\\Kb"),
2331            Expr::Concat(vec![make_literal("a"), Expr::KeepOut, make_literal("b"),])
2332        );
2333    }
2334
2335    #[test]
2336    fn no_quantifiers_on_other_non_repeatable_expressions() {
2337        assert_error(
2338            r"\K?",
2339            "Parsing error at position 2: Target of repeat operator is invalid",
2340        );
2341        assert_error(
2342            r"\G*",
2343            "Parsing error at position 2: Target of repeat operator is invalid",
2344        );
2345    }
2346
2347    #[test]
2348    fn backref_exists_condition() {
2349        assert_eq!(
2350            p("(h)?(?(1))"),
2351            Expr::Concat(vec![
2352                Expr::Repeat {
2353                    child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2354                    lo: 0,
2355                    hi: 1,
2356                    greedy: true
2357                },
2358                Expr::BackrefExistsCondition(1)
2359            ])
2360        );
2361        assert_eq!(
2362            p("(?<h>h)?(?('h'))"),
2363            Expr::Concat(vec![
2364                Expr::Repeat {
2365                    child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2366                    lo: 0,
2367                    hi: 1,
2368                    greedy: true
2369                },
2370                Expr::BackrefExistsCondition(1)
2371            ])
2372        );
2373    }
2374
2375    #[test]
2376    fn conditional_non_backref_validity_check_without_branches() {
2377        assert_error(
2378            "(?(foo))",
2379            "Parsing error at position 7: General parsing error: expected conditional to be a backreference or at least an expression for when the condition is true",
2380        );
2381    }
2382
2383    #[test]
2384    fn conditional_invalid_target_of_repeat_operator() {
2385        assert_error(
2386            r"(?(?=\d)\w|!)",
2387            "Parsing error at position 3: Target of repeat operator is invalid",
2388        );
2389    }
2390
2391    #[test]
2392    fn conditional_unclosed_at_end_of_pattern() {
2393        assert_error(
2394            r"(?(",
2395            "Parsing error at position 3: Opening parenthesis without closing parenthesis",
2396        );
2397    }
2398
2399    #[test]
2400    fn subroutine_call_unclosed_at_end_of_pattern() {
2401        assert_error(
2402            r"\g<",
2403            "Parsing error at position 2: Could not parse group name",
2404        );
2405
2406        assert_error(
2407            r"\g<name",
2408            "Parsing error at position 2: Could not parse group name",
2409        );
2410
2411        assert_error(
2412            r"\g'",
2413            "Parsing error at position 2: Could not parse group name",
2414        );
2415
2416        assert_error(
2417            r"\g''",
2418            "Parsing error at position 2: Could not parse group name",
2419        );
2420
2421        assert_error(
2422            r"\g<>",
2423            "Parsing error at position 2: Could not parse group name",
2424        );
2425
2426        assert_error(r"\g", "Parsing error at position 0: Invalid escape: \\g");
2427
2428        assert_error(
2429            r"\g test",
2430            "Parsing error at position 2: Could not parse group name",
2431        );
2432    }
2433
2434    #[test]
2435    fn subroutine_call_missing_subroutine_reference() {
2436        assert_error(
2437            r"\g test",
2438            "Parsing error at position 2: Could not parse group name",
2439        );
2440    }
2441
2442    #[test]
2443    fn subroutine_call_name_includes_dash() {
2444        assert_error(
2445            r"\g<1-0>(a)",
2446            "Parsing error at position 2: Could not parse group name",
2447        );
2448        assert_error(
2449            r"\g<name+1>(?'name'a)",
2450            "Parsing error at position 2: Could not parse group name",
2451        );
2452    }
2453
2454    #[test]
2455    fn backref_condition_with_one_two_or_three_branches() {
2456        assert_eq!(
2457            p("(h)?(?(1)i|x)"),
2458            Expr::Concat(vec![
2459                Expr::Repeat {
2460                    child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2461                    lo: 0,
2462                    hi: 1,
2463                    greedy: true
2464                },
2465                Expr::Conditional {
2466                    condition: Box::new(Expr::BackrefExistsCondition(1)),
2467                    true_branch: Box::new(make_literal("i")),
2468                    false_branch: Box::new(make_literal("x")),
2469                },
2470            ])
2471        );
2472
2473        assert_eq!(
2474            p("(h)?(?(1)i)"),
2475            Expr::Concat(vec![
2476                Expr::Repeat {
2477                    child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2478                    lo: 0,
2479                    hi: 1,
2480                    greedy: true
2481                },
2482                Expr::Conditional {
2483                    condition: Box::new(Expr::BackrefExistsCondition(1)),
2484                    true_branch: Box::new(make_literal("i")),
2485                    false_branch: Box::new(Expr::Empty),
2486                },
2487            ])
2488        );
2489
2490        assert_eq!(
2491            p("(h)?(?(1)ii|xy|z)"),
2492            Expr::Concat(vec![
2493                Expr::Repeat {
2494                    child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2495                    lo: 0,
2496                    hi: 1,
2497                    greedy: true
2498                },
2499                Expr::Conditional {
2500                    condition: Box::new(Expr::BackrefExistsCondition(1)),
2501                    true_branch: Box::new(Expr::Concat(
2502                        vec![make_literal("i"), make_literal("i"),]
2503                    )),
2504                    false_branch: Box::new(Expr::Alt(vec![
2505                        Expr::Concat(vec![make_literal("x"), make_literal("y"),]),
2506                        make_literal("z"),
2507                    ])),
2508                },
2509            ])
2510        );
2511
2512        assert_eq!(
2513            p("(?<cap>h)?(?(<cap>)ii|xy|z)"),
2514            Expr::Concat(vec![
2515                Expr::Repeat {
2516                    child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2517                    lo: 0,
2518                    hi: 1,
2519                    greedy: true
2520                },
2521                Expr::Conditional {
2522                    condition: Box::new(Expr::BackrefExistsCondition(1)),
2523                    true_branch: Box::new(Expr::Concat(
2524                        vec![make_literal("i"), make_literal("i"),]
2525                    )),
2526                    false_branch: Box::new(Expr::Alt(vec![
2527                        Expr::Concat(vec![make_literal("x"), make_literal("y"),]),
2528                        make_literal("z"),
2529                    ])),
2530                },
2531            ])
2532        );
2533    }
2534
2535    #[test]
2536    fn conditional() {
2537        assert_eq!(
2538            p("((?(a)b|c))(\\1)"),
2539            Expr::Concat(vec![
2540                Expr::Group(Box::new(Expr::Conditional {
2541                    condition: Box::new(make_literal("a")),
2542                    true_branch: Box::new(make_literal("b")),
2543                    false_branch: Box::new(make_literal("c"))
2544                })),
2545                Expr::Group(Box::new(Expr::Backref {
2546                    group: 1,
2547                    casei: false,
2548                },))
2549            ])
2550        );
2551
2552        assert_eq!(
2553            p(r"^(?(\d)abc|\d!)$"),
2554            Expr::Concat(vec![
2555                Expr::Assertion(Assertion::StartText),
2556                Expr::Conditional {
2557                    condition: Box::new(Expr::Delegate {
2558                        inner: "\\d".to_string(),
2559                        size: 1,
2560                        casei: false,
2561                    }),
2562                    true_branch: Box::new(Expr::Concat(vec![
2563                        make_literal("a"),
2564                        make_literal("b"),
2565                        make_literal("c"),
2566                    ])),
2567                    false_branch: Box::new(Expr::Concat(vec![
2568                        Expr::Delegate {
2569                            inner: "\\d".to_string(),
2570                            size: 1,
2571                            casei: false,
2572                        },
2573                        make_literal("!"),
2574                    ])),
2575                },
2576                Expr::Assertion(Assertion::EndText),
2577            ])
2578        );
2579
2580        assert_eq!(
2581            p(r"(?((?=\d))\w|!)"),
2582            Expr::Conditional {
2583                condition: Box::new(Expr::LookAround(
2584                    Box::new(Expr::Delegate {
2585                        inner: "\\d".to_string(),
2586                        size: 1,
2587                        casei: false
2588                    }),
2589                    LookAhead
2590                )),
2591                true_branch: Box::new(Expr::Delegate {
2592                    inner: "\\w".to_string(),
2593                    size: 1,
2594                    casei: false,
2595                }),
2596                false_branch: Box::new(make_literal("!")),
2597            },
2598        );
2599
2600        assert_eq!(
2601            p(r"(?((ab))c|d)"),
2602            Expr::Conditional {
2603                condition: Box::new(Expr::Group(Box::new(Expr::Concat(vec![
2604                    make_literal("a"),
2605                    make_literal("b"),
2606                ]),))),
2607                true_branch: Box::new(make_literal("c")),
2608                false_branch: Box::new(make_literal("d")),
2609            },
2610        );
2611    }
2612
2613    #[test]
2614    fn subroutines() {
2615        assert_eq!(
2616            p(r"(a)\g1"),
2617            Expr::Concat(vec![
2618                Expr::Group(Box::new(make_literal("a"))),
2619                Expr::SubroutineCall(1)
2620            ])
2621        );
2622
2623        assert_eq!(
2624            p(r"(a)\g<1>"),
2625            Expr::Concat(vec![
2626                Expr::Group(Box::new(make_literal("a"))),
2627                Expr::SubroutineCall(1)
2628            ])
2629        );
2630
2631        assert_eq!(
2632            p(r"(?<group_name>a)\g<group_name>"),
2633            Expr::Concat(vec![
2634                Expr::Group(Box::new(make_literal("a"))),
2635                Expr::SubroutineCall(1)
2636            ])
2637        );
2638
2639        assert_eq!(
2640            p(r"(?<group_name>a)\g'group_name'"),
2641            Expr::Concat(vec![
2642                Expr::Group(Box::new(make_literal("a"))),
2643                Expr::SubroutineCall(1)
2644            ])
2645        );
2646
2647        assert_eq!(
2648            p(r"(?<group_name>a)(?P>group_name)"),
2649            Expr::Concat(vec![
2650                Expr::Group(Box::new(make_literal("a"))),
2651                Expr::SubroutineCall(1)
2652            ])
2653        );
2654    }
2655
2656    #[test]
2657    fn subroutine_defined_later() {
2658        assert_eq!(
2659            p(r"\g<name>(?<name>a)"),
2660            Expr::Concat(vec![
2661                Expr::SubroutineCall(1),
2662                Expr::Group(Box::new(make_literal("a"))),
2663            ])
2664        );
2665
2666        assert_eq!(
2667            p(r"\g<c>(?:a|b|(?<c>c)?)"),
2668            Expr::Concat(vec![
2669                Expr::SubroutineCall(1),
2670                Expr::Alt(vec![
2671                    make_literal("a"),
2672                    make_literal("b"),
2673                    Expr::Repeat {
2674                        child: Box::new(Expr::Group(Box::new(make_literal("c")))),
2675                        lo: 0,
2676                        hi: 1,
2677                        greedy: true
2678                    }
2679                ])
2680            ])
2681        );
2682
2683        assert_eq!(
2684            p(r"(?<a>a)?\g<b>(?(<a>)(?<b>b)|c)"),
2685            Expr::Concat(vec![
2686                Expr::Repeat {
2687                    child: Box::new(Expr::Group(Box::new(make_literal("a")))),
2688                    lo: 0,
2689                    hi: 1,
2690                    greedy: true
2691                },
2692                Expr::SubroutineCall(2),
2693                Expr::Conditional {
2694                    condition: Box::new(Expr::BackrefExistsCondition(1)),
2695                    true_branch: Box::new(Expr::Group(Box::new(make_literal("b")))),
2696                    false_branch: Box::new(make_literal("c")),
2697                }
2698            ])
2699        );
2700
2701        assert_eq!(
2702            p(r"\g<1>(a)"),
2703            Expr::Concat(vec![
2704                Expr::SubroutineCall(1),
2705                Expr::Group(Box::new(make_literal("a"))),
2706            ])
2707        );
2708    }
2709
2710    #[test]
2711    fn recursive_subroutine_call() {
2712        assert_eq!(
2713            p(r"\A(?<a>|.|(?:(?<b>.)\g<a>\k<b>))\z"),
2714            Expr::Concat(vec![
2715                Expr::Assertion(Assertion::StartText,),
2716                Expr::Group(Box::new(Expr::Alt(vec![
2717                    Expr::Empty,
2718                    Expr::Any { newline: false },
2719                    Expr::Concat(vec![
2720                        Expr::Group(Box::new(Expr::Any { newline: false },)),
2721                        Expr::SubroutineCall(1,),
2722                        Expr::Backref {
2723                            group: 2,
2724                            casei: false,
2725                        },
2726                    ],),
2727                ],),)),
2728                Expr::Assertion(Assertion::EndText,),
2729            ],)
2730        );
2731    }
2732
2733    #[test]
2734    fn self_recursive_subroutine_call() {
2735        let tree = Expr::parse_tree(r"hello\g<0>?world").unwrap();
2736        assert!(tree.self_recursive);
2737
2738        let tree = Expr::parse_tree(r"hello\g0?world").unwrap();
2739        assert!(tree.self_recursive);
2740
2741        let tree = Expr::parse_tree(r"hello world").unwrap();
2742        assert!(!tree.self_recursive);
2743
2744        let tree = Expr::parse_tree(r"hello\g1world").unwrap();
2745        assert!(!tree.self_recursive);
2746
2747        let tree = Expr::parse_tree(r"hello\g<1>world").unwrap();
2748        assert!(!tree.self_recursive);
2749
2750        let tree = Expr::parse_tree(r"(hello\g1?world)").unwrap();
2751        assert!(!tree.self_recursive);
2752
2753        let tree = Expr::parse_tree(r"(?<a>hello\g<a>world)").unwrap();
2754        assert!(!tree.self_recursive);
2755    }
2756
2757    #[test]
2758    fn named_subroutine_not_defined_later() {
2759        assert_eq!(
2760            p(r"\g<wrong_name>(?<different_name>a)"),
2761            Expr::Concat(vec![
2762                Expr::UnresolvedNamedSubroutineCall {
2763                    name: "wrong_name".to_string(),
2764                    ix: 2
2765                },
2766                Expr::Group(Box::new(make_literal("a"))),
2767            ])
2768        );
2769    }
2770
2771    // found by cargo fuzz, then minimized
2772    #[test]
2773    fn fuzz_1() {
2774        p(r"\ä");
2775    }
2776
2777    #[test]
2778    fn fuzz_2() {
2779        p(r"\pä");
2780    }
2781
2782    #[test]
2783    fn fuzz_3() {
2784        fail(r"(?()^");
2785        fail(r#"!w(?()\"Kuz>"#);
2786    }
2787
2788    #[test]
2789    fn fuzz_4() {
2790        fail(r"\u{2}(?(2)");
2791    }
2792
2793    #[test]
2794    fn word_boundary_brace_syntax() {
2795        // Valid patterns produce correct assertions
2796        assert_eq!(
2797            p(r"\b{start}"),
2798            Expr::Assertion(Assertion::LeftWordBoundary)
2799        );
2800        assert_eq!(p(r"\b{end}"), Expr::Assertion(Assertion::RightWordBoundary));
2801        assert_eq!(
2802            p(r"\b{start-half}"),
2803            Expr::Assertion(Assertion::LeftWordHalfBoundary)
2804        );
2805        assert_eq!(
2806            p(r"\b{end-half}"),
2807            Expr::Assertion(Assertion::RightWordHalfBoundary)
2808        );
2809    }
2810
2811    #[test]
2812    fn word_boundary_brace_parsing_errors() {
2813        // Invalid patterns produce expected errors
2814        let test_cases = [
2815            (r"\b{invalid}", "Invalid escape: \\b{invalid}"),
2816            (r"\b{start ", "Invalid escape: \\b{...}"),
2817            (r"\b{end ", "Invalid escape: \\b{...}"),
2818            (r"\b{}", "Invalid escape: \\b{}"),
2819            (r"\b{", "Invalid escape: \\b{...}"),
2820            (r"\b{ }", "Invalid escape: \\b{ }"),
2821            (r"\b{START}", "Invalid escape: \\b{START}"),
2822            (r"\b{END}", "Invalid escape: \\b{END}"),
2823            (r"\b{ s t a r t }", "Invalid escape: \\b{ s t a r t }"),
2824            // \B{...} patterns should fail
2825            (r"\B{start}", "Invalid escape: \\B{start}"),
2826            (r"\B{end}", "Invalid escape: \\B{end}"),
2827            (r"\B{start-half}", "Invalid escape: \\B{start-half}"),
2828            (r"\B{end-half}", "Invalid escape: \\B{end-half}"),
2829            (r"\c{start}", "Invalid escape: \\c"),
2830        ];
2831
2832        for (pattern, expected_error) in test_cases {
2833            assert_error(
2834                pattern,
2835                &format!("Parsing error at position 0: {}", expected_error),
2836            );
2837        }
2838    }
2839
2840    fn get_options(pattern: &str, func: impl Fn(SyntaxConfig) -> SyntaxConfig) -> RegexOptions {
2841        let mut options = RegexOptions::default();
2842        options.syntaxc = func(options.syntaxc);
2843        options.pattern = String::from(pattern);
2844        options
2845    }
2846
2847    #[test]
2848    fn parse_with_case_insensitive_in_pattern() {
2849        let tree = Expr::parse_tree("(?i)hello");
2850        let expr = tree.unwrap().expr;
2851
2852        assert_eq!(
2853            expr,
2854            Expr::Concat(vec![
2855                make_literal_case_insensitive("h", true),
2856                make_literal_case_insensitive("e", true),
2857                make_literal_case_insensitive("l", true),
2858                make_literal_case_insensitive("l", true),
2859                make_literal_case_insensitive("o", true)
2860            ])
2861        );
2862    }
2863
2864    #[test]
2865    fn parse_with_case_insensitive_option() {
2866        let options = get_options("hello", |x| x.case_insensitive(true));
2867
2868        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2869        let expr = tree.unwrap().expr;
2870
2871        assert_eq!(
2872            expr,
2873            Expr::Concat(vec![
2874                make_literal_case_insensitive("h", true),
2875                make_literal_case_insensitive("e", true),
2876                make_literal_case_insensitive("l", true),
2877                make_literal_case_insensitive("l", true),
2878                make_literal_case_insensitive("o", true)
2879            ])
2880        );
2881    }
2882
2883    #[test]
2884    fn parse_with_multiline_in_pattern() {
2885        let options = get_options("(?m)^hello$", |x| x);
2886
2887        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2888        let expr = tree.unwrap().expr;
2889
2890        assert_eq!(
2891            expr,
2892            Expr::Concat(vec![
2893                Expr::Assertion(Assertion::StartLine { crlf: false }),
2894                make_literal("h"),
2895                make_literal("e"),
2896                make_literal("l"),
2897                make_literal("l"),
2898                make_literal("o"),
2899                Expr::Assertion(Assertion::EndLine { crlf: false })
2900            ])
2901        );
2902    }
2903
2904    #[test]
2905    fn pparse_with_multiline_option() {
2906        let options = get_options("^hello$", |x| x.multi_line(true));
2907
2908        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2909        let expr = tree.unwrap().expr;
2910
2911        assert_eq!(
2912            expr,
2913            Expr::Concat(vec![
2914                Expr::Assertion(Assertion::StartLine { crlf: false }),
2915                make_literal("h"),
2916                make_literal("e"),
2917                make_literal("l"),
2918                make_literal("l"),
2919                make_literal("o"),
2920                Expr::Assertion(Assertion::EndLine { crlf: false })
2921            ])
2922        );
2923    }
2924
2925    #[test]
2926    fn parse_with_dot_matches_new_line_in_pattern() {
2927        let options = get_options("(?s)(.*)", |x| x);
2928
2929        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2930        let expr = tree.unwrap().expr;
2931
2932        assert_eq!(
2933            expr,
2934            Expr::Group(Box::new(Expr::Repeat {
2935                child: Box::new(Expr::Any { newline: true }),
2936                lo: 0,
2937                hi: usize::MAX,
2938                greedy: true
2939            }))
2940        );
2941    }
2942
2943    #[test]
2944    fn parse_with_dot_matches_new_line_option() {
2945        let options = get_options("(.*)", |x| x.dot_matches_new_line(true));
2946
2947        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2948        let expr = tree.unwrap().expr;
2949
2950        assert_eq!(
2951            expr,
2952            Expr::Group(Box::new(Expr::Repeat {
2953                child: Box::new(Expr::Any { newline: true }),
2954                lo: 0,
2955                hi: usize::MAX,
2956                greedy: true
2957            }))
2958        );
2959    }
2960
2961    #[test]
2962    fn parse_fancy_with_dot_matches_new_line_in_pattern() {
2963        let options = get_options("(.*)(?<=hugo)", |x| x.dot_matches_new_line(true));
2964
2965        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2966        let expr = tree.unwrap().expr;
2967
2968        assert_eq!(
2969            expr,
2970            Expr::Concat(vec![
2971                Expr::Group(Box::new(Expr::Repeat {
2972                    child: Box::new(Expr::Any { newline: true }),
2973                    lo: 0,
2974                    hi: usize::MAX,
2975                    greedy: true
2976                })),
2977                Expr::LookAround(
2978                    Box::new(Expr::Concat(vec![
2979                        make_literal("h"),
2980                        make_literal("u"),
2981                        make_literal("g"),
2982                        make_literal("o")
2983                    ])),
2984                    LookBehind
2985                )
2986            ])
2987        );
2988    }
2989
2990    #[test]
2991    fn parse_with_case_insensitre_from_pattern_and_multi_line_option() {
2992        let options = get_options("(?i)^hello$", |x| x.multi_line(true));
2993
2994        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2995        let expr = tree.unwrap().expr;
2996
2997        assert_eq!(
2998            expr,
2999            Expr::Concat(vec![
3000                Expr::Assertion(Assertion::StartLine { crlf: false }),
3001                make_literal_case_insensitive("h", true),
3002                make_literal_case_insensitive("e", true),
3003                make_literal_case_insensitive("l", true),
3004                make_literal_case_insensitive("l", true),
3005                make_literal_case_insensitive("o", true),
3006                Expr::Assertion(Assertion::EndLine { crlf: false })
3007            ])
3008        );
3009    }
3010
3011    #[test]
3012    fn parse_with_multi_line_and_case_insensitive_options() {
3013        let mut options = get_options("^hello$", |x| x.multi_line(true));
3014        options.syntaxc = options.syntaxc.case_insensitive(true);
3015
3016        let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
3017        let expr = tree.unwrap().expr;
3018
3019        assert_eq!(
3020            expr,
3021            Expr::Concat(vec![
3022                Expr::Assertion(Assertion::StartLine { crlf: false }),
3023                make_literal_case_insensitive("h", true),
3024                make_literal_case_insensitive("e", true),
3025                make_literal_case_insensitive("l", true),
3026                make_literal_case_insensitive("l", true),
3027                make_literal_case_insensitive("o", true),
3028                Expr::Assertion(Assertion::EndLine { crlf: false })
3029            ])
3030        );
3031    }
3032}