1use crate::RegexOptions;
24use alloc::boxed::Box;
25use alloc::string::{String, ToString};
26use alloc::vec::Vec;
27use alloc::{format, vec};
28
29use bit_set::BitSet;
30use regex_syntax::escape_into;
31
32use crate::parse_flags::*;
33use crate::{codepoint_len, CompileError, Error, Expr, ParseError, Result, MAX_RECURSION};
34use crate::{Assertion, LookAround::*};
35
36#[cfg(not(feature = "std"))]
37pub(crate) type NamedGroups = alloc::collections::BTreeMap<String, usize>;
38#[cfg(feature = "std")]
39pub(crate) type NamedGroups = std::collections::HashMap<String, usize>;
40
41#[derive(Debug, Clone)]
42pub struct ExprTree {
43 pub expr: Expr,
44 pub backrefs: BitSet,
45 pub named_groups: NamedGroups,
46 #[allow(dead_code)]
47 pub(crate) contains_subroutines: bool,
48 pub(crate) self_recursive: bool,
49}
50
51#[derive(Debug)]
52pub(crate) struct Parser<'a> {
53 re: &'a str, backrefs: BitSet,
55 flags: u32,
56 named_groups: NamedGroups,
57 numeric_backrefs: bool,
58 curr_group: usize, contains_subroutines: bool,
60 has_unresolved_subroutines: bool,
61 self_recursive: bool,
62}
63
64struct NamedBackrefOrSubroutine<'a> {
65 ix: usize,
66 group_ix: Option<usize>,
67 group_name: Option<&'a str>,
68 recursion_level: Option<isize>,
69}
70
71impl<'a> Parser<'a> {
72 pub(crate) fn parse_with_flags(re: &str, flags: u32) -> Result<ExprTree> {
73 let mut p = Parser::new(re, flags);
74 let (ix, mut expr) = p.parse_re(0, 0)?;
75 if ix < re.len() {
76 return Err(Error::ParseError(
77 ix,
78 ParseError::GeneralParseError("end of string not reached".to_string()),
79 ));
80 }
81
82 if p.has_unresolved_subroutines {
83 p.has_unresolved_subroutines = false;
84 p.resolve_named_subroutine_calls(&mut expr);
85 }
86
87 Ok(ExprTree {
88 expr,
89 backrefs: p.backrefs,
90 named_groups: p.named_groups,
91 contains_subroutines: p.contains_subroutines,
92 self_recursive: p.self_recursive,
93 })
94 }
95
96 pub(crate) fn parse(re: &str) -> Result<ExprTree> {
97 Self::parse_with_flags(re, RegexOptions::default().compute_flags())
98 }
99
100 fn new(re: &str, flags: u32) -> Parser<'_> {
101 let flags = flags | FLAG_UNICODE;
102
103 Parser {
104 re,
105 backrefs: Default::default(),
106 named_groups: Default::default(),
107 numeric_backrefs: false,
108 flags,
109 curr_group: 0,
110 contains_subroutines: false,
111 has_unresolved_subroutines: false,
112 self_recursive: false,
113 }
114 }
115
116 fn parse_re(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
117 let (ix, child) = self.parse_branch(ix, depth)?;
118 let mut ix = self.optional_whitespace(ix)?;
119 if self.re[ix..].starts_with('|') {
120 let mut children = vec![child];
121 while self.re[ix..].starts_with('|') {
122 ix += 1;
123 let (next, child) = self.parse_branch(ix, depth)?;
124 children.push(child);
125 ix = self.optional_whitespace(next)?;
126 }
127 return Ok((ix, Expr::Alt(children)));
128 }
129 if self.numeric_backrefs && !self.named_groups.is_empty() {
131 return Err(Error::CompileError(Box::new(
132 CompileError::NamedBackrefOnly,
133 )));
134 }
135 Ok((ix, child))
136 }
137
138 fn parse_branch(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
139 let mut children = Vec::new();
140 let mut ix = ix;
141 while ix < self.re.len() {
142 let (next, child) = self.parse_piece(ix, depth)?;
143 if next == ix {
144 break;
145 }
146 if child != Expr::Empty {
147 children.push(child);
148 }
149 ix = next;
150 }
151 match children.len() {
152 0 => Ok((ix, Expr::Empty)),
153 1 => Ok((ix, children.pop().unwrap())),
154 _ => Ok((ix, Expr::Concat(children))),
155 }
156 }
157
158 fn parse_piece(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
159 let (ix, child) = self.parse_atom(ix, depth)?;
160 let mut ix = self.optional_whitespace(ix)?;
161 if ix < self.re.len() {
162 let (lo, hi) = match self.re.as_bytes()[ix] {
164 b'?' => (0, 1),
165 b'*' => (0, usize::MAX),
166 b'+' => (1, usize::MAX),
167 b'{' => {
168 match self.parse_repeat(ix) {
169 Ok((next, lo, hi)) => {
170 ix = next - 1;
171 (lo, hi)
172 }
173 Err(_) => {
174 return Ok((ix, child));
176 }
177 }
178 }
179 _ => return Ok((ix, child)),
180 };
181 if !self.is_repeatable(&child) {
182 return Err(Error::ParseError(ix, ParseError::TargetNotRepeatable));
183 }
184 ix += 1;
185 ix = self.optional_whitespace(ix)?;
186 let mut greedy = true;
187 if ix < self.re.len() && self.re.as_bytes()[ix] == b'?' {
188 greedy = false;
189 ix += 1;
190 }
191 greedy ^= self.flag(FLAG_SWAP_GREED);
192 let mut node = Expr::Repeat {
193 child: Box::new(child),
194 lo,
195 hi,
196 greedy,
197 };
198 if ix < self.re.len() && self.re.as_bytes()[ix] == b'+' {
199 ix += 1;
200 node = Expr::AtomicGroup(Box::new(node));
201 }
202 return Ok((ix, node));
203 }
204 Ok((ix, child))
205 }
206
207 fn is_repeatable(&self, child: &Expr) -> bool {
208 match child {
209 Expr::LookAround(_, _) => false,
210 Expr::Empty => false,
211 Expr::Assertion(_) => !self.flag(FLAG_ONIGURUMA_MODE),
213 Expr::KeepOut => false,
214 Expr::ContinueFromPreviousMatchEnd => false,
215 Expr::BackrefExistsCondition(_) => false,
216 _ => true,
217 }
218 }
219
220 fn parse_repeat(&self, ix: usize) -> Result<(usize, usize, usize)> {
222 let ix = self.optional_whitespace(ix + 1)?; let bytes = self.re.as_bytes();
224 if ix == self.re.len() {
225 return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
226 }
227 let mut end = ix;
228 let lo = if bytes[ix] == b',' {
229 0
230 } else if let Some((next, lo)) = parse_decimal(self.re, ix) {
231 end = next;
232 lo
233 } else {
234 return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
235 };
236 let ix = self.optional_whitespace(end)?; if ix == self.re.len() {
238 return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
239 }
240 end = ix;
241 let hi = match bytes[ix] {
242 b'}' => lo,
243 b',' => {
244 end = self.optional_whitespace(ix + 1)?; if let Some((next, hi)) = parse_decimal(self.re, end) {
246 end = next;
247 hi
248 } else {
249 usize::MAX
250 }
251 }
252 _ => return Err(Error::ParseError(ix, ParseError::InvalidRepeat)),
253 };
254 let ix = self.optional_whitespace(end)?; if ix == self.re.len() || bytes[ix] != b'}' {
256 return Err(Error::ParseError(ix, ParseError::InvalidRepeat));
257 }
258 Ok((ix + 1, lo, hi))
259 }
260
261 fn parse_atom(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
262 let ix = self.optional_whitespace(ix)?;
263 if ix == self.re.len() {
264 return Ok((ix, Expr::Empty));
265 }
266 match self.re.as_bytes()[ix] {
267 b'.' => Ok((
268 ix + 1,
269 Expr::Any {
270 newline: self.flag(FLAG_DOTNL),
271 },
272 )),
273 b'^' => Ok((
274 ix + 1,
275 if self.flag(FLAG_MULTI) {
276 Expr::Assertion(Assertion::StartLine { crlf: false })
278 } else {
279 Expr::Assertion(Assertion::StartText)
280 },
281 )),
282 b'$' => Ok((
283 ix + 1,
284 if self.flag(FLAG_MULTI) {
285 Expr::Assertion(Assertion::EndLine { crlf: false })
287 } else {
288 Expr::Assertion(Assertion::EndText)
289 },
290 )),
291 b'(' => self.parse_group(ix, depth),
292 b'\\' => self.parse_escape(ix, false),
293 b'+' | b'*' | b'?' | b'|' | b')' => Ok((ix, Expr::Empty)),
294 b'[' => self.parse_class(ix),
295 b => {
296 let next = ix + codepoint_len(b);
298 Ok((
299 next,
300 Expr::Literal {
301 val: String::from(&self.re[ix..next]),
302 casei: self.flag(FLAG_CASEI),
303 },
304 ))
305 }
306 }
307 }
308
309 fn parse_named_backref(
310 &mut self,
311 ix: usize,
312 open: &str,
313 close: &str,
314 allow_relative: bool,
315 ) -> Result<(usize, Expr)> {
316 let NamedBackrefOrSubroutine {
317 ix: end,
318 group_ix,
319 group_name,
320 recursion_level,
321 } = self.parse_named_backref_or_subroutine(ix, open, close, allow_relative)?;
322 if let Some(group) = group_ix {
323 self.backrefs.insert(group);
324 return Ok((
325 end,
326 if let Some(recursion_level) = recursion_level {
327 Expr::BackrefWithRelativeRecursionLevel {
328 group,
329 relative_level: recursion_level,
330 casei: self.flag(FLAG_CASEI),
331 }
332 } else {
333 Expr::Backref {
334 group,
335 casei: self.flag(FLAG_CASEI),
336 }
337 },
338 ));
339 }
340 if let Some(group_name) = group_name {
341 return Err(Error::ParseError(
343 ix,
344 ParseError::InvalidGroupNameBackref(group_name.to_string()),
345 ));
346 }
347 unreachable!()
348 }
349
350 fn parse_named_subroutine_call(
351 &mut self,
352 ix: usize,
353 open: &str,
354 close: &str,
355 allow_relative: bool,
356 ) -> Result<(usize, Expr)> {
357 let NamedBackrefOrSubroutine {
358 ix: end,
359 group_ix,
360 group_name,
361 recursion_level,
362 } = self.parse_named_backref_or_subroutine(ix, open, close, allow_relative)?;
363 if recursion_level.is_some() {
364 return Err(Error::ParseError(ix, ParseError::InvalidGroupName));
365 }
366 if let Some(group) = group_ix {
367 self.contains_subroutines = true;
368 if group == 0 {
369 self.self_recursive = true;
370 }
371 return Ok((end, Expr::SubroutineCall(group)));
372 }
373 if let Some(group_name) = group_name {
374 let expr = Expr::UnresolvedNamedSubroutineCall {
376 name: group_name.to_string(),
377 ix,
378 };
379 self.has_unresolved_subroutines = true;
380 self.contains_subroutines = true;
381 return Ok((end, expr));
382 }
383 unreachable!()
384 }
385
386 fn parse_named_backref_or_subroutine(
387 &self,
388 ix: usize,
389 open: &str,
390 close: &str,
391 allow_relative: bool,
392 ) -> Result<NamedBackrefOrSubroutine<'_>> {
393 if let Some(ParsedId {
394 id,
395 mut relative,
396 skip,
397 }) = parse_id(&self.re[ix..], open, close, allow_relative)
398 {
399 let group = if let Some(group) = self.named_groups.get(id) {
400 Some(*group)
401 } else if let Ok(group) = id.parse::<usize>() {
402 Some(group)
403 } else if let Some(relative_group) = relative {
404 if id.is_empty() {
405 relative = None;
406 self.curr_group.checked_add_signed(if relative_group < 0 {
407 relative_group + 1
408 } else {
409 relative_group
410 })
411 } else {
412 None
413 }
414 } else {
415 None
416 };
417 if let Some(group) = group {
418 Ok(NamedBackrefOrSubroutine {
419 ix: ix + skip,
420 group_ix: Some(group),
421 group_name: None,
422 recursion_level: relative,
423 })
424 } else {
425 Ok(NamedBackrefOrSubroutine {
427 ix: ix + skip,
428 group_ix: None,
429 group_name: Some(id),
430 recursion_level: relative,
431 })
432 }
433 } else {
434 Err(Error::ParseError(ix, ParseError::InvalidGroupName))
436 }
437 }
438
439 fn parse_numbered_backref(&mut self, ix: usize) -> Result<(usize, Expr)> {
440 let (end, group) = self.parse_numbered_backref_or_subroutine_call(ix)?;
441 self.numeric_backrefs = true;
442 self.backrefs.insert(group);
443 Ok((
444 end,
445 Expr::Backref {
446 group,
447 casei: self.flag(FLAG_CASEI),
448 },
449 ))
450 }
451
452 fn parse_numbered_subroutine_call(&mut self, ix: usize) -> Result<(usize, Expr)> {
453 let (end, group) = self.parse_numbered_backref_or_subroutine_call(ix)?;
454 self.numeric_backrefs = true;
455 self.contains_subroutines = true;
456 if group == 0 {
457 self.self_recursive = true;
458 }
459 Ok((end, Expr::SubroutineCall(group)))
460 }
461
462 fn parse_numbered_backref_or_subroutine_call(&self, ix: usize) -> Result<(usize, usize)> {
463 if let Some((end, group)) = parse_decimal(self.re, ix) {
464 if group < self.re.len() / 2 {
466 return Ok((end, group));
467 }
468 }
469 Err(Error::ParseError(ix, ParseError::InvalidBackref))
470 }
471
472 fn parse_escape(&mut self, ix: usize, in_class: bool) -> Result<(usize, Expr)> {
474 let bytes = self.re.as_bytes();
475 let Some(b) = bytes.get(ix + 1).copied() else {
476 return Err(Error::ParseError(ix, ParseError::TrailingBackslash));
477 };
478 let end = ix + 1 + codepoint_len(b);
479 Ok(if b.is_ascii_digit() {
480 return self.parse_numbered_backref(ix + 1);
481 } else if matches!(b, b'k') && !in_class {
482 if bytes.get(end) == Some(&b'\'') {
484 return self.parse_named_backref(end, "'", "'", true);
485 } else {
486 return self.parse_named_backref(end, "<", ">", true);
487 }
488 } else if b == b'A' && !in_class {
489 (end, Expr::Assertion(Assertion::StartText))
490 } else if b == b'z' && !in_class {
491 (end, Expr::Assertion(Assertion::EndText))
492 } else if b == b'Z' && !in_class {
493 (
494 end,
495 Expr::LookAround(
496 Box::new(Expr::Delegate {
497 inner: "\\n*$".to_string(),
498 size: 0,
499 casei: false,
500 }),
501 LookAhead,
502 ),
503 )
504 } else if (b == b'b' || b == b'B') && !in_class {
505 let check_pos = self.optional_whitespace(end)?;
506 if bytes.get(check_pos) == Some(&b'{') {
507 let next_open_brace_pos = self.optional_whitespace(check_pos + 1)?;
508 let is_repetition = matches!(
509 bytes.get(next_open_brace_pos),
510 Some(&ch) if ch.is_ascii_digit() || ch == b','
511 );
512 if !is_repetition {
513 return self.parse_word_boundary_brace(ix);
514 }
515 }
516 let expr = if b == b'b' {
517 Expr::Assertion(Assertion::WordBoundary)
518 } else {
519 Expr::Assertion(Assertion::NotWordBoundary)
520 };
521 (end, expr)
522 } else if b == b'<' && !in_class {
523 let expr = if self.flag(FLAG_ONIGURUMA_MODE) {
524 make_literal("<")
525 } else {
526 Expr::Assertion(Assertion::LeftWordBoundary)
527 };
528 (end, expr)
529 } else if b == b'>' && !in_class {
530 let expr = if self.flag(FLAG_ONIGURUMA_MODE) {
531 make_literal(">")
532 } else {
533 Expr::Assertion(Assertion::RightWordBoundary)
534 };
535 (end, expr)
536 } else if matches!(b | 32, b'd' | b's' | b'w') {
537 (
538 end,
539 Expr::Delegate {
540 inner: String::from(&self.re[ix..end]),
541 size: 1,
542 casei: self.flag(FLAG_CASEI),
543 },
544 )
545 } else if (b | 32) == b'h' {
546 let s = if b == b'h' {
547 "[0-9A-Fa-f]"
548 } else {
549 "[^0-9A-Fa-f]"
550 };
551 (
552 end,
553 Expr::Delegate {
554 inner: String::from(s),
555 size: 1,
556 casei: false,
557 },
558 )
559 } else if b == b'x' {
560 let end = self.optional_whitespace(end)?;
561 return self.parse_hex(end, 2);
562 } else if b == b'u' {
563 let end = self.optional_whitespace(end)?;
564 return self.parse_hex(end, 4);
565 } else if b == b'U' {
566 let end = self.optional_whitespace(end)?;
567 return self.parse_hex(end, 8);
568 } else if (b | 32) == b'p' && end != bytes.len() {
569 let mut end = end;
570 let b = bytes[end];
571 end += codepoint_len(b);
572 if b == b'{' {
573 loop {
574 if end == self.re.len() {
575 return Err(Error::ParseError(ix, ParseError::UnclosedUnicodeName));
576 }
577 let b = bytes[end];
578 if b == b'}' {
579 end += 1;
580 break;
581 }
582 end += codepoint_len(b);
583 }
584 }
585 (
586 end,
587 Expr::Delegate {
588 inner: String::from(&self.re[ix..end]),
589 size: 1,
590 casei: self.flag(FLAG_CASEI),
591 },
592 )
593 } else if b == b'K' && !in_class {
594 (end, Expr::KeepOut)
595 } else if b == b'G' && !in_class {
596 (end, Expr::ContinueFromPreviousMatchEnd)
597 } else if b == b'O' && !in_class {
598 (end, Expr::Any { newline: true })
599 } else if b == b'g' && !in_class {
600 if end == self.re.len() {
601 return Err(Error::ParseError(
602 ix,
603 ParseError::InvalidEscape("\\g".to_string()),
604 ));
605 }
606 let b = bytes[end];
607 if b.is_ascii_digit() {
608 self.parse_numbered_subroutine_call(end)?
609 } else if b == b'\'' {
610 self.parse_named_subroutine_call(end, "'", "'", true)?
611 } else {
612 self.parse_named_subroutine_call(end, "<", ">", true)?
613 }
614 } else {
615 (
617 end,
618 make_literal(match b {
619 b'a' => "\x07", b'b' => "\x08", b'f' => "\x0c", b'n' => "\n", b'r' => "\r", b't' => "\t", b'v' => "\x0b", b'e' => "\x1b", b' ' => " ",
628 b => {
629 let s = &self.re[ix + 1..end];
630 if b.is_ascii_alphabetic()
631 && !matches!(
632 b,
633 b'k' | b'A' | b'z' | b'b' | b'B' | b'<' | b'>' | b'K' | b'G'
634 )
635 {
636 return Err(Error::ParseError(
637 ix,
638 ParseError::InvalidEscape(format!("\\{}", s)),
639 ));
640 } else {
641 s
642 }
643 }
644 }),
645 )
646 })
647 }
648
649 fn parse_hex(&self, ix: usize, digits: usize) -> Result<(usize, Expr)> {
651 if ix >= self.re.len() {
652 return Err(Error::ParseError(ix, ParseError::InvalidHex));
654 }
655 let bytes = self.re.as_bytes();
656 let b = bytes[ix];
657 if ix + digits <= self.re.len() && bytes[ix..ix + digits].iter().all(|&b| is_hex_digit(b)) {
659 let hex_str = &self.re[ix..ix + digits];
660 return self.hex_to_literal(ix, ix + digits, hex_str);
661 }
662 if b == b'{' {
664 let mut pos = ix + 1;
665 let mut hex_chars = String::new();
666 while pos < self.re.len() {
667 pos = self.optional_whitespace(pos)?;
669 if pos >= self.re.len() {
670 return Err(Error::ParseError(ix, ParseError::InvalidHex));
671 }
672 let b = bytes[pos];
673 if b == b'}' && !hex_chars.is_empty() {
674 return self.hex_to_literal(ix, pos + 1, &hex_chars);
675 }
676 if is_hex_digit(b) && hex_chars.len() < 8 {
677 hex_chars.push(b as char);
678 pos += 1;
679 } else {
680 return Err(Error::ParseError(ix, ParseError::InvalidHex));
681 }
682 }
683 }
684 Err(Error::ParseError(ix, ParseError::InvalidHex))
685 }
686
687 fn hex_to_literal(&self, ix: usize, end: usize, hex_str: &str) -> Result<(usize, Expr)> {
688 let codepoint = u32::from_str_radix(hex_str, 16).unwrap();
689 if let Some(c) = char::from_u32(codepoint) {
690 Ok((
691 end,
692 Expr::Literal {
693 val: c.to_string(),
694 casei: self.flag(FLAG_CASEI),
695 },
696 ))
697 } else {
698 Err(Error::ParseError(ix, ParseError::InvalidCodepointValue))
699 }
700 }
701
702 fn parse_word_boundary_brace(&self, ix: usize) -> Result<(usize, Expr)> {
704 let bytes = self.re.as_bytes();
705
706 if !matches!(bytes.get(ix..ix + 2), Some([b'\\', b'b' | b'B'])) {
708 return Err(Error::ParseError(
709 ix,
710 ParseError::InvalidEscape("\\b{...}".to_string()),
711 ));
712 }
713 let brace_start = self.optional_whitespace(ix + 2)?;
715 if bytes.get(brace_start) != Some(&b'{') {
717 return Err(Error::ParseError(
718 ix,
719 ParseError::InvalidEscape("\\b{...}".to_string()),
720 ));
721 }
722 let mut pos = brace_start + 1;
724 let mut content = String::new();
725 while pos < self.re.len() {
726 let b = bytes[pos];
727 if b == b'}' {
728 break;
729 }
730 let next_pos = self.optional_whitespace(pos)?;
732 if next_pos > pos {
733 pos = next_pos;
735 if pos >= self.re.len() || bytes[pos] == b'}' {
736 break;
737 }
738 }
739 let b = bytes[pos];
741 if b != b'}' {
742 content.push(b as char);
743 pos += codepoint_len(b);
744 }
745 }
746
747 let end_brace = pos;
748 if end_brace >= self.re.len() || bytes[end_brace] != b'}' {
749 return Err(Error::ParseError(
750 ix,
751 ParseError::InvalidEscape("\\b{...}".to_string()),
752 ));
753 }
754
755 if bytes[ix + 1] == b'B' {
757 return Err(Error::ParseError(
758 ix,
759 ParseError::InvalidEscape(format!("\\B{{{}}}", content)),
760 ));
761 }
762
763 let expr = match content.as_str() {
764 "start" => Expr::Assertion(Assertion::LeftWordBoundary),
765 "end" => Expr::Assertion(Assertion::RightWordBoundary),
766 "start-half" => Expr::Assertion(Assertion::LeftWordHalfBoundary),
767 "end-half" => Expr::Assertion(Assertion::RightWordHalfBoundary),
768 _ => {
769 return Err(Error::ParseError(
770 ix,
771 ParseError::InvalidEscape(format!("\\b{{{}}}", content)),
772 ));
773 }
774 };
775
776 Ok((end_brace + 1, expr))
777 }
778
779 fn parse_class(&mut self, ix: usize) -> Result<(usize, Expr)> {
780 let bytes = self.re.as_bytes();
781 let mut ix = ix + 1; let mut class = String::new();
783 let mut nest = 1;
784 class.push('[');
785
786 if bytes.get(ix) == Some(&b'^') {
788 class.push('^');
789 ix += 1;
790 }
791
792 if bytes.get(ix) == Some(&b']') {
794 class.push(']');
795 ix += 1;
796 }
797
798 loop {
799 if ix == self.re.len() {
800 return Err(Error::ParseError(ix, ParseError::InvalidClass));
801 }
802 let end = match bytes[ix] {
803 b'\\' => {
804 let (end, expr) = self.parse_escape(ix, true)?;
806 match expr {
807 Expr::Literal { val, .. } => {
808 debug_assert_eq!(val.chars().count(), 1);
809 escape_into(&val, &mut class);
810 }
811 Expr::Delegate { inner, .. } => {
812 class.push_str(&inner);
813 }
814 _ => {
815 return Err(Error::ParseError(ix, ParseError::InvalidClass));
816 }
817 }
818 end
819 }
820 b'[' => {
821 nest += 1;
822 class.push('[');
823 ix + 1
824 }
825 b']' => {
826 nest -= 1;
827 class.push(']');
828 if nest == 0 {
829 break;
830 }
831 ix + 1
832 }
833 b => {
834 let end = ix + codepoint_len(b);
835 class.push_str(&self.re[ix..end]);
836 end
837 }
838 };
839 ix = end;
840 }
841 let class = Expr::Delegate {
842 inner: class,
843 size: 1,
844 casei: self.flag(FLAG_CASEI),
845 };
846 let ix = ix + 1; Ok((ix, class))
848 }
849
850 fn parse_group(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
851 let depth = depth + 1;
852 if depth >= MAX_RECURSION {
853 return Err(Error::ParseError(ix, ParseError::RecursionExceeded));
854 }
855 let ix = self.optional_whitespace(ix + 1)?;
856 let (la, skip) = if self.re[ix..].starts_with("?=") {
857 (Some(LookAhead), 2)
858 } else if self.re[ix..].starts_with("?!") {
859 (Some(LookAheadNeg), 2)
860 } else if self.re[ix..].starts_with("?<=") {
861 (Some(LookBehind), 3)
862 } else if self.re[ix..].starts_with("?<!") {
863 (Some(LookBehindNeg), 3)
864 } else if self.re[ix..].starts_with("?<") || self.re[ix..].starts_with("?'") {
865 self.curr_group += 1;
867 let (open, close) = if self.re[ix..].starts_with("?<") {
868 ("<", ">")
869 } else {
870 ("'", "'")
871 };
872 if let Some(ParsedId {
873 id,
874 relative: None,
875 skip,
876 }) = parse_id(&self.re[ix + 1..], open, close, false)
877 {
878 self.named_groups.insert(id.to_string(), self.curr_group);
879 (None, skip + 1)
880 } else {
881 return Err(Error::ParseError(ix, ParseError::InvalidGroupName));
882 }
883 } else if self.re[ix..].starts_with("?P<") {
884 self.curr_group += 1; if let Some(ParsedId {
887 id,
888 relative: None,
889 skip,
890 }) = parse_id(&self.re[ix + 2..], "<", ">", false)
891 {
892 self.named_groups.insert(id.to_string(), self.curr_group);
893 (None, skip + 2)
894 } else {
895 return Err(Error::ParseError(ix, ParseError::InvalidGroupName));
896 }
897 } else if self.re[ix..].starts_with("?P=") {
898 return self.parse_named_backref(ix + 3, "", ")", false);
900 } else if self.re[ix..].starts_with("?>") {
901 (None, 2)
902 } else if self.re[ix..].starts_with("?(") {
903 return self.parse_conditional(ix + 2, depth);
904 } else if self.re[ix..].starts_with("?P>") {
905 return self.parse_named_subroutine_call(ix + 3, "", ")", false);
906 } else if self.re[ix..].starts_with('?') {
907 return self.parse_flags(ix, depth);
908 } else {
909 self.curr_group += 1; (None, 0)
911 };
912 let ix = ix + skip;
913 let (ix, child) = self.parse_re(ix, depth)?;
914 let ix = self.check_for_close_paren(ix)?;
915 let result = match (la, skip) {
916 (Some(la), _) => Expr::LookAround(Box::new(child), la),
917 (None, 2) => Expr::AtomicGroup(Box::new(child)),
918 _ => Expr::Group(Box::new(child)),
919 };
920 Ok((ix, result))
921 }
922
923 fn check_for_close_paren(&self, ix: usize) -> Result<usize> {
924 let ix = self.optional_whitespace(ix)?;
925 if ix == self.re.len() {
926 return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
927 } else if self.re.as_bytes()[ix] != b')' {
928 return Err(Error::ParseError(
929 ix,
930 ParseError::GeneralParseError("expected close paren".to_string()),
931 ));
932 }
933 Ok(ix + 1)
934 }
935
936 fn parse_flags(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
938 let start = ix + 1;
939
940 fn unknown_flag(re: &str, start: usize, end: usize) -> Error {
941 let after_end = end + codepoint_len(re.as_bytes()[end]);
942 let s = format!("(?{}", &re[start..after_end]);
943 Error::ParseError(start, ParseError::UnknownFlag(s))
944 }
945
946 let mut ix = start;
947 let mut neg = false;
948 let oldflags = self.flags;
949 loop {
950 ix = self.optional_whitespace(ix)?;
951 if ix == self.re.len() {
952 return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
953 }
954 let b = self.re.as_bytes()[ix];
955 match b {
956 b'i' => self.update_flag(FLAG_CASEI, neg),
957 b'm' => self.update_flag(FLAG_MULTI, neg),
958 b's' => self.update_flag(FLAG_DOTNL, neg),
959 b'U' => self.update_flag(FLAG_SWAP_GREED, neg),
960 b'x' => self.update_flag(FLAG_IGNORE_SPACE, neg),
961 b'u' => {
962 if neg {
963 return Err(Error::ParseError(ix, ParseError::NonUnicodeUnsupported));
964 }
965 }
966 b'-' => {
967 if neg {
968 return Err(unknown_flag(self.re, start, ix));
969 }
970 neg = true;
971 }
972 b')' => {
973 if ix == start || neg && ix == start + 1 {
974 return Err(unknown_flag(self.re, start, ix));
975 }
976 return Ok((ix + 1, Expr::Empty));
977 }
978 b':' => {
979 if neg && ix == start + 1 {
980 return Err(unknown_flag(self.re, start, ix));
981 }
982 ix += 1;
983 let (ix, child) = self.parse_re(ix, depth)?;
984 if ix == self.re.len() {
985 return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
986 } else if self.re.as_bytes()[ix] != b')' {
987 return Err(Error::ParseError(
988 ix,
989 ParseError::GeneralParseError("expected close paren".to_string()),
990 ));
991 };
992 self.flags = oldflags;
993 return Ok((ix + 1, child));
994 }
995 _ => return Err(unknown_flag(self.re, start, ix)),
996 }
997 ix += 1;
998 }
999 }
1000
1001 fn parse_conditional(&mut self, ix: usize, depth: usize) -> Result<(usize, Expr)> {
1003 if ix >= self.re.len() {
1004 return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
1005 }
1006 let bytes = self.re.as_bytes();
1007 let b = bytes[ix];
1009 let (next, condition) = if b == b'\'' {
1010 self.parse_named_backref(ix, "'", "')", true)?
1011 } else if b == b'<' {
1012 self.parse_named_backref(ix, "<", ">)", true)?
1013 } else if b == b'+' || b == b'-' || b.is_ascii_digit() {
1014 self.parse_named_backref(ix, "", ")", true)?
1015 } else {
1016 let (next, condition) = self.parse_re(ix, depth)?;
1017 (self.check_for_close_paren(next)?, condition)
1018 };
1019 let (end, child) = self.parse_re(next, depth)?;
1020 if end == next {
1021 if let Expr::Backref { group, .. } = condition {
1023 let after = self.check_for_close_paren(end)?;
1024 return Ok((after, Expr::BackrefExistsCondition(group)));
1025 } else {
1026 return Err(Error::ParseError(
1027 end,
1028 ParseError::GeneralParseError(
1029 "expected conditional to be a backreference or at least an expression for when the condition is true".to_string()
1030 )
1031 ));
1032 }
1033 }
1034 let if_true: Expr;
1035 let mut if_false: Expr = Expr::Empty;
1036 if let Expr::Alt(mut alternatives) = child {
1037 if_true = alternatives.remove(0);
1039 if alternatives.len() == 1 {
1041 if_false = alternatives.pop().expect("expected 2 alternatives");
1042 } else {
1043 if_false = Expr::Alt(alternatives);
1045 }
1046 } else {
1047 if_true = child;
1049 }
1050 let inner_condition = if let Expr::Backref { group, .. } = condition {
1051 Expr::BackrefExistsCondition(group)
1052 } else {
1053 condition
1054 };
1055
1056 let after = self.check_for_close_paren(end)?;
1057 Ok((
1058 after,
1059 if if_true == Expr::Empty && if_false == Expr::Empty {
1060 inner_condition
1061 } else {
1062 Expr::Conditional {
1063 condition: Box::new(inner_condition),
1064 true_branch: Box::new(if_true),
1065 false_branch: Box::new(if_false),
1066 }
1067 },
1068 ))
1069 }
1070
1071 fn flag(&self, flag: u32) -> bool {
1072 let v = self.flags & flag;
1073 v == flag
1074 }
1075
1076 fn update_flag(&mut self, flag: u32, neg: bool) {
1077 if neg {
1078 self.flags &= !flag;
1079 } else {
1080 self.flags |= flag;
1081 }
1082 }
1083
1084 fn optional_whitespace(&self, mut ix: usize) -> Result<usize> {
1085 let bytes = self.re.as_bytes();
1086 loop {
1087 if ix == self.re.len() {
1088 return Ok(ix);
1089 }
1090 match bytes[ix] {
1091 b'#' if self.flag(FLAG_IGNORE_SPACE) => {
1092 match bytes[ix..].iter().position(|&c| c == b'\n') {
1093 Some(x) => ix += x + 1,
1094 None => return Ok(self.re.len()),
1095 }
1096 }
1097 b' ' | b'\r' | b'\n' | b'\t' if self.flag(FLAG_IGNORE_SPACE) => ix += 1,
1098 b'(' if bytes[ix..].starts_with(b"(?#") => {
1099 ix += 3;
1100 loop {
1101 if ix >= self.re.len() {
1102 return Err(Error::ParseError(ix, ParseError::UnclosedOpenParen));
1103 }
1104 match bytes[ix] {
1105 b')' => {
1106 ix += 1;
1107 break;
1108 }
1109 b'\\' => ix += 2,
1110 _ => ix += 1,
1111 }
1112 }
1113 }
1114 _ => return Ok(ix),
1115 }
1116 }
1117 }
1118
1119 fn resolve_named_subroutine_calls(&mut self, expr: &mut Expr) {
1120 match expr {
1121 Expr::UnresolvedNamedSubroutineCall { name, .. } => {
1122 if let Some(group) = self.named_groups.get(name) {
1123 *expr = Expr::SubroutineCall(*group);
1124 } else {
1125 self.has_unresolved_subroutines = true;
1126 }
1127 }
1128 Expr::Group(inner) | Expr::LookAround(inner, _) | Expr::AtomicGroup(inner) => {
1130 self.resolve_named_subroutine_calls(inner);
1131 }
1132 Expr::Concat(children) | Expr::Alt(children) => {
1133 for child in children {
1134 self.resolve_named_subroutine_calls(child);
1135 }
1136 }
1137 Expr::Repeat { child, .. } => {
1138 self.resolve_named_subroutine_calls(child);
1139 }
1140 Expr::Conditional {
1141 condition,
1142 true_branch,
1143 false_branch,
1144 } => {
1145 self.resolve_named_subroutine_calls(condition);
1146 self.resolve_named_subroutine_calls(true_branch);
1147 self.resolve_named_subroutine_calls(false_branch);
1148 }
1149 _ => {}
1150 }
1151 }
1152}
1153
1154pub(crate) fn parse_decimal(s: &str, ix: usize) -> Option<(usize, usize)> {
1156 let mut end = ix;
1157 while end < s.len() && s.as_bytes()[end].is_ascii_digit() {
1158 end += 1;
1159 }
1160 s[ix..end].parse::<usize>().ok().map(|val| (end, val))
1161}
1162
1163#[derive(Debug, PartialEq)]
1164pub(crate) struct ParsedId<'a> {
1165 pub id: &'a str,
1166 pub relative: Option<isize>,
1167 pub skip: usize,
1168}
1169
1170pub(crate) fn parse_id<'a>(
1174 s: &'a str,
1175 open: &'_ str,
1176 close: &'_ str,
1177 allow_relative: bool,
1178) -> Option<ParsedId<'a>> {
1179 debug_assert!(!close.starts_with(is_id_char));
1180
1181 if !s.starts_with(open) || s.len() <= open.len() + close.len() {
1182 return None;
1183 }
1184
1185 let id_start = open.len();
1186 let mut iter = s[id_start..].char_indices().peekable();
1187 let after_id = iter.find(|(_, ch)| !is_id_char(*ch));
1188
1189 let id_len = match after_id.map(|(i, _)| i) {
1190 Some(id_len) => id_len,
1191 None if close.is_empty() => s.len(),
1192 _ => 0,
1193 };
1194
1195 let id_end = id_start + id_len;
1196 if id_len > 0 && s[id_end..].starts_with(close) {
1197 return Some(ParsedId {
1198 id: &s[id_start..id_end],
1199 relative: None,
1200 skip: id_end + close.len(),
1201 });
1202 } else if !allow_relative {
1203 return None;
1204 }
1205 let relative_sign = s.as_bytes()[id_end];
1206 if relative_sign == b'+' || relative_sign == b'-' {
1207 if let Some((end, relative_amount)) = parse_decimal(s, id_end + 1) {
1208 if s[end..].starts_with(close) {
1209 if relative_amount == 0 && id_len == 0 {
1210 return None;
1211 }
1212 let relative_amount_signed = if relative_sign == b'-' {
1213 -(relative_amount as isize)
1214 } else {
1215 relative_amount as isize
1216 };
1217 return Some(ParsedId {
1218 id: &s[id_start..id_end],
1219 relative: Some(relative_amount_signed),
1220 skip: end + close.len(),
1221 });
1222 }
1223 }
1224 }
1225 None
1226}
1227
1228fn is_id_char(c: char) -> bool {
1229 c.is_alphanumeric() || c == '_'
1230}
1231
1232fn is_hex_digit(b: u8) -> bool {
1233 b.is_ascii_digit() || (b'a' <= (b | 32) && (b | 32) <= b'f')
1234}
1235
1236pub(crate) fn make_literal(s: &str) -> Expr {
1237 make_literal_case_insensitive(s, false)
1238}
1239
1240pub(crate) fn make_literal_case_insensitive(s: &str, case_insensitive: bool) -> Expr {
1241 Expr::Literal {
1242 val: String::from(s),
1243 casei: case_insensitive,
1244 }
1245}
1246
1247#[cfg(test)]
1248mod tests {
1249 use alloc::boxed::Box;
1250 use alloc::string::{String, ToString};
1251 use alloc::{format, vec};
1252
1253 use crate::parse::{make_literal, make_literal_case_insensitive, parse_id};
1254 use crate::{Assertion, Expr};
1255 use crate::{LookAround::*, RegexOptions, SyntaxConfig};
1256
1257 fn p(s: &str) -> Expr {
1258 Expr::parse_tree(s).unwrap().expr
1259 }
1260
1261 fn parse_oniguruma(s: &str) -> crate::Result<Expr> {
1262 let mut options = RegexOptions::default();
1263 options.oniguruma_mode = true;
1264 options.pattern = String::from(s);
1265 Expr::parse_tree_with_flags(&options.pattern, options.compute_flags()).map(|tree| tree.expr)
1266 }
1267
1268 #[cfg_attr(feature = "track_caller", track_caller)]
1269 fn fail(s: &str) {
1270 assert!(
1271 Expr::parse_tree(s).is_err(),
1272 "Expected parse error, but was: {:?}",
1273 Expr::parse_tree(s)
1274 );
1275 }
1276
1277 #[cfg_attr(feature = "track_caller", track_caller)]
1278 fn assert_error(re: &str, expected_error: &str) {
1279 let result = Expr::parse_tree(re);
1280 assert!(
1281 result.is_err(),
1282 "Expected parse error, but was: {:?}",
1283 result
1284 );
1285 assert_eq!(&format!("{}", result.err().unwrap()), expected_error);
1286 }
1287
1288 #[cfg_attr(feature = "track_caller", track_caller)]
1289 fn assert_error_oniguruma(re: &str, expected_error: &str) {
1290 let result = parse_oniguruma(re);
1291 assert!(
1292 result.is_err(),
1293 "Expected parse error in Oniguruma mode, but was: {:?}",
1294 result
1295 );
1296 assert_eq!(&format!("{}", result.err().unwrap()), expected_error);
1297 }
1298
1299 #[test]
1300 fn empty() {
1301 assert_eq!(p(""), Expr::Empty);
1302 }
1303
1304 #[test]
1305 fn any() {
1306 assert_eq!(p("."), Expr::Any { newline: false });
1307 assert_eq!(p("(?s:.)"), Expr::Any { newline: true });
1308 }
1309
1310 #[test]
1311 fn start_text() {
1312 assert_eq!(p("^"), Expr::Assertion(Assertion::StartText));
1313 }
1314
1315 #[test]
1316 fn end_text() {
1317 assert_eq!(p("$"), Expr::Assertion(Assertion::EndText));
1318 }
1319
1320 #[test]
1321 fn end_text_before_empty_lines() {
1322 assert_eq!(
1323 p("\\Z"),
1324 Expr::LookAround(
1325 Box::new(Expr::Delegate {
1326 inner: "\\n*$".to_string(),
1327 size: 0,
1328 casei: false,
1329 }),
1330 LookAhead,
1331 )
1332 );
1333 }
1334
1335 #[test]
1336 fn literal() {
1337 assert_eq!(p("a"), make_literal("a"));
1338 }
1339
1340 #[test]
1341 fn literal_special() {
1342 assert_eq!(p("}"), make_literal("}"));
1343 assert_eq!(p("]"), make_literal("]"));
1344 }
1345
1346 #[test]
1347 fn parse_id_test() {
1348 use crate::parse::ParsedId;
1349 fn create_id(id: &str, relative: Option<isize>, skip: usize) -> Option<ParsedId<'_>> {
1350 Some(ParsedId { id, relative, skip })
1351 }
1352 assert_eq!(parse_id("foo.", "", "", true), create_id("foo", None, 3));
1353 assert_eq!(parse_id("1.", "", "", true), create_id("1", None, 1));
1354 assert_eq!(parse_id("{foo}", "{", "}", true), create_id("foo", None, 5));
1355 assert_eq!(parse_id("{foo.", "{", "}", true), None);
1356 assert_eq!(parse_id("{foo", "{", "}", true), None);
1357 assert_eq!(parse_id("{}", "{", "}", true), None);
1358 assert_eq!(parse_id("", "", "", true), None);
1359 assert_eq!(parse_id("{-1}", "{", "}", true), create_id("", Some(-1), 4));
1360 assert_eq!(parse_id("{-1}", "{", "}", false), None);
1361 assert_eq!(parse_id("{-a}", "{", "}", true), None);
1362 assert_eq!(parse_id("{-a}", "{", "}", false), None);
1363 assert_eq!(parse_id("{+a}", "{", "}", false), None);
1364 assert_eq!(parse_id("+a", "", "", false), None);
1365 assert_eq!(parse_id("-a", "", "", false), None);
1366 assert_eq!(parse_id("2+a", "", "", false), create_id("2", None, 1));
1367 assert_eq!(parse_id("2-a", "", "", false), create_id("2", None, 1));
1368
1369 assert_eq!(parse_id("<+1>", "<", ">", true), create_id("", Some(1), 4));
1370 assert_eq!(parse_id("<-3>", "<", ">", true), create_id("", Some(-3), 4));
1371 assert_eq!(
1372 parse_id("<n+1>", "<", ">", true),
1373 create_id("n", Some(1), 5)
1374 );
1375 assert_eq!(
1376 parse_id("<n-1>", "<", ">", true),
1377 create_id("n", Some(-1), 5)
1378 );
1379 assert_eq!(parse_id("<>", "<", ">", true), None);
1380 assert_eq!(parse_id("<", "<", ">", true), None);
1381 assert_eq!(parse_id("<+0>", "<", ">", true), None);
1382 assert_eq!(parse_id("<-0>", "<", ">", true), None);
1383 assert_eq!(
1384 parse_id("<n+0>", "<", ">", true),
1385 create_id("n", Some(0), 5)
1386 );
1387 assert_eq!(
1388 parse_id("<n-0>", "<", ">", true),
1389 create_id("n", Some(0), 5)
1390 );
1391 assert_eq!(
1392 parse_id("<2-0>", "<", ">", true),
1393 create_id("2", Some(0), 5)
1394 );
1395 assert_eq!(
1396 parse_id("<2+0>", "<", ">", true),
1397 create_id("2", Some(0), 5)
1398 );
1399 assert_eq!(
1400 parse_id("<2+1>", "<", ">", true),
1401 create_id("2", Some(1), 5)
1402 );
1403 assert_eq!(
1404 parse_id("<2-1>", "<", ">", true),
1405 create_id("2", Some(-1), 5)
1406 );
1407 }
1408
1409 #[test]
1410 fn literal_unescaped_opening_curly() {
1411 assert_eq!(p("{"), make_literal("{"));
1413 assert_eq!(p("({)"), Expr::Group(Box::new(make_literal("{"),)));
1414 assert_eq!(
1415 p("a|{"),
1416 Expr::Alt(vec![make_literal("a"), make_literal("{"),])
1417 );
1418 assert_eq!(
1419 p("{{2}"),
1420 Expr::Repeat {
1421 child: Box::new(make_literal("{")),
1422 lo: 2,
1423 hi: 2,
1424 greedy: true
1425 }
1426 );
1427 }
1428
1429 #[test]
1430 fn literal_escape() {
1431 assert_eq!(p("\\'"), make_literal("'"));
1432 assert_eq!(p("\\\""), make_literal("\""));
1433 assert_eq!(p("\\ "), make_literal(" "));
1434 assert_eq!(p("\\xA0"), make_literal("\u{A0}"));
1435 assert_eq!(p("\\x{1F4A9}"), make_literal("\u{1F4A9}"));
1436 assert_eq!(p("\\x{000000B7}"), make_literal("\u{B7}"));
1437 assert_eq!(p("\\u21D2"), make_literal("\u{21D2}"));
1438 assert_eq!(p("\\u{21D2}"), make_literal("\u{21D2}"));
1439 assert_eq!(p("\\u21D2x"), p("\u{21D2}x"));
1440 assert_eq!(p("\\U0001F60A"), make_literal("\u{1F60A}"));
1441 assert_eq!(p("\\U{0001F60A}"), make_literal("\u{1F60A}"));
1442 }
1443
1444 #[test]
1445 fn hex_escape() {
1446 assert_eq!(
1447 p("\\h"),
1448 Expr::Delegate {
1449 inner: String::from("[0-9A-Fa-f]"),
1450 size: 1,
1451 casei: false
1452 }
1453 );
1454 assert_eq!(
1455 p("\\H"),
1456 Expr::Delegate {
1457 inner: String::from("[^0-9A-Fa-f]"),
1458 size: 1,
1459 casei: false
1460 }
1461 );
1462 }
1463
1464 #[test]
1465 fn invalid_escape() {
1466 assert_error(
1467 "\\",
1468 "Parsing error at position 0: Backslash without following character",
1469 );
1470 assert_error("\\q", "Parsing error at position 0: Invalid escape: \\q");
1471 assert_error("\\u", "Parsing error at position 2: Invalid hex escape");
1472 assert_error("\\U", "Parsing error at position 2: Invalid hex escape");
1473 assert_error("\\x", "Parsing error at position 2: Invalid hex escape");
1474 assert_error("\\xAG", "Parsing error at position 2: Invalid hex escape");
1475 assert_error("\\xA", "Parsing error at position 2: Invalid hex escape");
1476 assert_error("\\x{}", "Parsing error at position 2: Invalid hex escape");
1477 assert_error("\\x{AG}", "Parsing error at position 2: Invalid hex escape");
1478 assert_error("\\x{42", "Parsing error at position 2: Invalid hex escape");
1479 assert_error(
1480 "\\x{D800}",
1481 "Parsing error at position 2: Invalid codepoint for hex or unicode escape",
1482 );
1483 assert_error(
1484 "\\x{110000}",
1485 "Parsing error at position 2: Invalid codepoint for hex or unicode escape",
1486 );
1487 assert_error("\\u123", "Parsing error at position 2: Invalid hex escape");
1488 assert_error("\\u123x", "Parsing error at position 2: Invalid hex escape");
1489 assert_error("\\u{}", "Parsing error at position 2: Invalid hex escape");
1490 assert_error(
1491 "\\U1234567",
1492 "Parsing error at position 2: Invalid hex escape",
1493 );
1494 assert_error("\\U{}", "Parsing error at position 2: Invalid hex escape");
1495 }
1496
1497 #[test]
1498 fn concat() {
1499 assert_eq!(
1500 p("ab"),
1501 Expr::Concat(vec![make_literal("a"), make_literal("b"),])
1502 );
1503 }
1504
1505 #[test]
1506 fn alt() {
1507 assert_eq!(
1508 p("a|b"),
1509 Expr::Alt(vec![make_literal("a"), make_literal("b"),])
1510 );
1511 }
1512
1513 #[test]
1514 fn group() {
1515 assert_eq!(p("(a)"), Expr::Group(Box::new(make_literal("a"),)));
1516 }
1517
1518 #[test]
1519 fn named_group() {
1520 assert_eq!(p("(?'name'a)"), Expr::Group(Box::new(make_literal("a"),)));
1521 assert_eq!(p("(?<name>a)"), Expr::Group(Box::new(make_literal("a"),)));
1522 }
1523
1524 #[test]
1525 fn group_repeat() {
1526 assert_eq!(
1527 p("(a){2}"),
1528 Expr::Repeat {
1529 child: Box::new(Expr::Group(Box::new(make_literal("a")))),
1530 lo: 2,
1531 hi: 2,
1532 greedy: true
1533 }
1534 );
1535 }
1536
1537 #[test]
1538 fn repeat() {
1539 assert_eq!(
1540 p("a{2,42}"),
1541 Expr::Repeat {
1542 child: Box::new(make_literal("a")),
1543 lo: 2,
1544 hi: 42,
1545 greedy: true
1546 }
1547 );
1548 assert_eq!(
1549 p("a{2,}"),
1550 Expr::Repeat {
1551 child: Box::new(make_literal("a")),
1552 lo: 2,
1553 hi: usize::MAX,
1554 greedy: true
1555 }
1556 );
1557 assert_eq!(
1558 p("a{2}"),
1559 Expr::Repeat {
1560 child: Box::new(make_literal("a")),
1561 lo: 2,
1562 hi: 2,
1563 greedy: true
1564 }
1565 );
1566 assert_eq!(
1567 p("a{,2}"),
1568 Expr::Repeat {
1569 child: Box::new(make_literal("a")),
1570 lo: 0,
1571 hi: 2,
1572 greedy: true
1573 }
1574 );
1575
1576 assert_eq!(
1577 p("a{2,42}?"),
1578 Expr::Repeat {
1579 child: Box::new(make_literal("a")),
1580 lo: 2,
1581 hi: 42,
1582 greedy: false
1583 }
1584 );
1585 assert_eq!(
1586 p("a{2,}?"),
1587 Expr::Repeat {
1588 child: Box::new(make_literal("a")),
1589 lo: 2,
1590 hi: usize::MAX,
1591 greedy: false
1592 }
1593 );
1594 assert_eq!(
1595 p("a{2}?"),
1596 Expr::Repeat {
1597 child: Box::new(make_literal("a")),
1598 lo: 2,
1599 hi: 2,
1600 greedy: false
1601 }
1602 );
1603 assert_eq!(
1604 p("a{,2}?"),
1605 Expr::Repeat {
1606 child: Box::new(make_literal("a")),
1607 lo: 0,
1608 hi: 2,
1609 greedy: false
1610 }
1611 );
1612 }
1613
1614 #[test]
1615 fn invalid_repeat() {
1616 assert_eq!(
1618 p("a{"),
1619 Expr::Concat(vec![make_literal("a"), make_literal("{"),])
1620 );
1621 assert_eq!(
1622 p("a{6"),
1623 Expr::Concat(vec![
1624 make_literal("a"),
1625 make_literal("{"),
1626 make_literal("6"),
1627 ])
1628 );
1629 assert_eq!(
1630 p("a{6,"),
1631 Expr::Concat(vec![
1632 make_literal("a"),
1633 make_literal("{"),
1634 make_literal("6"),
1635 make_literal(","),
1636 ])
1637 );
1638 assert_eq!(
1639 p("a{1,A}"),
1640 Expr::Concat(vec![
1641 make_literal("a"),
1642 make_literal("{"),
1643 make_literal("1"),
1644 make_literal(","),
1645 make_literal("A"),
1646 make_literal("}"),
1647 ])
1648 );
1649 assert_eq!(
1650 p("a{1,2A}"),
1651 Expr::Concat(vec![
1652 make_literal("a"),
1653 make_literal("{"),
1654 make_literal("1"),
1655 make_literal(","),
1656 make_literal("2"),
1657 make_literal("A"),
1658 make_literal("}"),
1659 ])
1660 );
1661 }
1662
1663 #[test]
1664 fn quantifiers_on_assertions_in_regex_mode() {
1665 assert_eq!(
1666 p(r"\b{1}"),
1667 Expr::Repeat {
1668 child: Box::new(Expr::Assertion(Assertion::WordBoundary)),
1669 lo: 1,
1670 hi: 1,
1671 greedy: true
1672 }
1673 );
1674 assert_eq!(
1675 p(r"\B{2}"),
1676 Expr::Repeat {
1677 child: Box::new(Expr::Assertion(Assertion::NotWordBoundary)),
1678 lo: 2,
1679 hi: 2,
1680 greedy: true
1681 }
1682 );
1683 assert_eq!(
1684 p(r"^{3}"),
1685 Expr::Repeat {
1686 child: Box::new(Expr::Assertion(Assertion::StartText)),
1687 lo: 3,
1688 hi: 3,
1689 greedy: true
1690 }
1691 );
1692 assert_eq!(
1693 p(r"${1,5}"),
1694 Expr::Repeat {
1695 child: Box::new(Expr::Assertion(Assertion::EndText)),
1696 lo: 1,
1697 hi: 5,
1698 greedy: true
1699 }
1700 );
1701 assert_eq!(
1702 p(r"\A*"),
1703 Expr::Repeat {
1704 child: Box::new(Expr::Assertion(Assertion::StartText)),
1705 lo: 0,
1706 hi: usize::MAX,
1707 greedy: true
1708 }
1709 );
1710 assert_eq!(
1711 p(r"\z+"),
1712 Expr::Repeat {
1713 child: Box::new(Expr::Assertion(Assertion::EndText)),
1714 lo: 1,
1715 hi: usize::MAX,
1716 greedy: true
1717 }
1718 );
1719 assert_eq!(
1720 p(r"^?"),
1721 Expr::Repeat {
1722 child: Box::new(Expr::Assertion(Assertion::StartText)),
1723 lo: 0,
1724 hi: 1,
1725 greedy: true
1726 }
1727 );
1728 assert_eq!(
1729 p(r"${2}"),
1730 Expr::Repeat {
1731 child: Box::new(Expr::Assertion(Assertion::EndText)),
1732 lo: 2,
1733 hi: 2,
1734 greedy: true
1735 }
1736 );
1737 assert_eq!(
1738 p(r"(?m)^?"),
1739 Expr::Repeat {
1740 child: Box::new(Expr::Assertion(Assertion::StartLine { crlf: false })),
1741 lo: 0,
1742 hi: 1,
1743 greedy: true
1744 }
1745 );
1746 assert_eq!(
1747 p(r"(?m)${2}"),
1748 Expr::Repeat {
1749 child: Box::new(Expr::Assertion(Assertion::EndLine { crlf: false })),
1750 lo: 2,
1751 hi: 2,
1752 greedy: true
1753 }
1754 );
1755 assert_eq!(
1756 p(r"\b+"),
1757 Expr::Repeat {
1758 child: Box::new(Expr::Assertion(Assertion::WordBoundary)),
1759 lo: 1,
1760 hi: usize::MAX,
1761 greedy: true
1762 }
1763 );
1764 }
1765
1766 #[test]
1767 fn delegate_zero() {
1768 assert_eq!(p("\\b"), Expr::Assertion(Assertion::WordBoundary),);
1769 assert_eq!(p("\\B"), Expr::Assertion(Assertion::NotWordBoundary),);
1770 }
1771
1772 #[test]
1773 fn delegate_named_group() {
1774 assert_eq!(
1775 p("\\p{Greek}"),
1776 Expr::Delegate {
1777 inner: String::from("\\p{Greek}"),
1778 size: 1,
1779 casei: false
1780 }
1781 );
1782 assert_eq!(
1783 p("\\pL"),
1784 Expr::Delegate {
1785 inner: String::from("\\pL"),
1786 size: 1,
1787 casei: false
1788 }
1789 );
1790 assert_eq!(
1791 p("\\P{Greek}"),
1792 Expr::Delegate {
1793 inner: String::from("\\P{Greek}"),
1794 size: 1,
1795 casei: false
1796 }
1797 );
1798 assert_eq!(
1799 p("\\PL"),
1800 Expr::Delegate {
1801 inner: String::from("\\PL"),
1802 size: 1,
1803 casei: false
1804 }
1805 );
1806 assert_eq!(
1807 p("(?i)\\p{Ll}"),
1808 Expr::Delegate {
1809 inner: String::from("\\p{Ll}"),
1810 size: 1,
1811 casei: true
1812 }
1813 );
1814 }
1815
1816 #[test]
1817 fn backref() {
1818 assert_eq!(
1819 p("(.)\\1"),
1820 Expr::Concat(vec![
1821 Expr::Group(Box::new(Expr::Any { newline: false })),
1822 Expr::Backref {
1823 group: 1,
1824 casei: false,
1825 },
1826 ])
1827 );
1828 }
1829
1830 #[test]
1831 fn named_backref() {
1832 assert_eq!(
1833 p("(?<i>.)\\k<i>"),
1834 Expr::Concat(vec![
1835 Expr::Group(Box::new(Expr::Any { newline: false })),
1836 Expr::Backref {
1837 group: 1,
1838 casei: false,
1839 },
1840 ])
1841 );
1842 }
1843
1844 #[test]
1845 fn relative_backref() {
1846 assert_eq!(
1847 p(r"(a)(.)\k<-1>"),
1848 Expr::Concat(vec![
1849 Expr::Group(Box::new(make_literal("a"))),
1850 Expr::Group(Box::new(Expr::Any { newline: false })),
1851 Expr::Backref {
1852 group: 2,
1853 casei: false,
1854 },
1855 ])
1856 );
1857
1858 assert_eq!(
1859 p(r"(a)\k<+1>(.)"),
1860 Expr::Concat(vec![
1861 Expr::Group(Box::new(make_literal("a"))),
1862 Expr::Backref {
1863 group: 2,
1864 casei: false,
1865 },
1866 Expr::Group(Box::new(Expr::Any { newline: false })),
1867 ])
1868 );
1869
1870 fail("(?P<->.)");
1871 fail("(.)(?P=-)");
1872 fail(r"(a)\k<-0>(.)");
1873 fail(r"(a)\k<+0>(.)");
1874 fail(r"(a)\k<+>(.)");
1875 fail(r"(a)\k<->(.)");
1876 fail(r"(a)\k<>(.)");
1877 }
1878
1879 #[test]
1880 fn relative_backref_with_recursion_level() {
1881 assert_eq!(
1882 p(r"()\k<1+3>"),
1883 Expr::Concat(vec![
1884 Expr::Group(Box::new(Expr::Empty)),
1885 Expr::BackrefWithRelativeRecursionLevel {
1886 group: 1,
1887 relative_level: 3,
1888 casei: false,
1889 },
1890 ]),
1891 );
1892
1893 assert_eq!(
1894 p(r"()\k<1-0>"),
1895 Expr::Concat(vec![
1896 Expr::Group(Box::new(Expr::Empty)),
1897 Expr::BackrefWithRelativeRecursionLevel {
1898 group: 1,
1899 relative_level: 0,
1900 casei: false,
1901 },
1902 ]),
1903 );
1904
1905 assert_eq!(
1906 p(r"(?<n>)\k<n+3>"),
1907 Expr::Concat(vec![
1908 Expr::Group(Box::new(Expr::Empty)),
1909 Expr::BackrefWithRelativeRecursionLevel {
1910 group: 1,
1911 relative_level: 3,
1912 casei: false,
1913 },
1914 ]),
1915 );
1916
1917 assert_eq!(
1918 p(r"(?<n>)\k<n-3>"),
1919 Expr::Concat(vec![
1920 Expr::Group(Box::new(Expr::Empty)),
1921 Expr::BackrefWithRelativeRecursionLevel {
1922 group: 1,
1923 relative_level: -3,
1924 casei: false,
1925 }
1926 ]),
1927 );
1928
1929 assert_eq!(
1930 p(r"\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z"),
1931 Expr::Concat(vec![
1932 Expr::Assertion(Assertion::StartText),
1933 Expr::Group(Box::new(Expr::Alt(vec![
1934 Expr::Empty,
1935 Expr::Any { newline: false },
1936 Expr::Concat(vec![
1937 Expr::Group(Box::new(Expr::Any { newline: false })),
1938 Expr::SubroutineCall(1),
1939 Expr::BackrefWithRelativeRecursionLevel {
1940 group: 2,
1941 relative_level: 0,
1942 casei: false,
1943 },
1944 ])
1945 ]))),
1946 Expr::Assertion(Assertion::EndText)
1947 ]),
1948 );
1949 }
1950
1951 #[test]
1952 fn relative_subroutine_call() {
1953 assert_eq!(
1954 p(r"(a)(.)\g<-1>"),
1955 Expr::Concat(vec![
1956 Expr::Group(Box::new(make_literal("a"))),
1957 Expr::Group(Box::new(Expr::Any { newline: false })),
1958 Expr::SubroutineCall(2),
1959 ])
1960 );
1961
1962 assert_eq!(
1963 p(r"(a)\g<+1>(.)"),
1964 Expr::Concat(vec![
1965 Expr::Group(Box::new(make_literal("a"))),
1966 Expr::SubroutineCall(2),
1967 Expr::Group(Box::new(Expr::Any { newline: false })),
1968 ])
1969 );
1970
1971 fail(r"(a)\g<-0>(.)");
1972 fail(r"(a)\g<+0>(.)");
1973 fail(r"(a)\g<+>(.)");
1974 fail(r"(a)\g<->(.)");
1975 fail(r"(a)\g<>(.)");
1976 }
1977
1978 #[test]
1979 fn lookaround() {
1980 assert_eq!(
1981 p("(?=a)"),
1982 Expr::LookAround(Box::new(make_literal("a")), LookAhead)
1983 );
1984 assert_eq!(
1985 p("(?!a)"),
1986 Expr::LookAround(Box::new(make_literal("a")), LookAheadNeg)
1987 );
1988 assert_eq!(
1989 p("(?<=a)"),
1990 Expr::LookAround(Box::new(make_literal("a")), LookBehind)
1991 );
1992 assert_eq!(
1993 p("(?<!a)"),
1994 Expr::LookAround(Box::new(make_literal("a")), LookBehindNeg)
1995 );
1996 }
1997
1998 #[test]
1999 fn shy_group() {
2000 assert_eq!(
2001 p("(?:ab)c"),
2002 Expr::Concat(vec![
2003 Expr::Concat(vec![make_literal("a"), make_literal("b"),]),
2004 make_literal("c"),
2005 ])
2006 );
2007 }
2008
2009 #[test]
2010 fn flag_state() {
2011 assert_eq!(p("(?s)."), Expr::Any { newline: true });
2012 assert_eq!(p("(?s:(?-s:.))"), Expr::Any { newline: false });
2013 assert_eq!(
2014 p("(?s:.)."),
2015 Expr::Concat(vec![
2016 Expr::Any { newline: true },
2017 Expr::Any { newline: false },
2018 ])
2019 );
2020 assert_eq!(
2021 p("(?:(?s).)."),
2022 Expr::Concat(vec![
2023 Expr::Any { newline: true },
2024 Expr::Any { newline: false },
2025 ])
2026 );
2027 }
2028
2029 #[test]
2030 fn flag_multiline() {
2031 assert_eq!(p("^"), Expr::Assertion(Assertion::StartText));
2032 assert_eq!(
2033 p("(?m:^)"),
2034 Expr::Assertion(Assertion::StartLine { crlf: false })
2035 );
2036 assert_eq!(p("$"), Expr::Assertion(Assertion::EndText));
2037 assert_eq!(
2038 p("(?m:$)"),
2039 Expr::Assertion(Assertion::EndLine { crlf: false })
2040 );
2041 }
2042
2043 #[test]
2044 fn flag_swap_greed() {
2045 assert_eq!(p("a*"), p("(?U:a*?)"));
2046 assert_eq!(p("a*?"), p("(?U:a*)"));
2047 }
2048
2049 #[test]
2050 fn invalid_flags() {
2051 assert!(Expr::parse_tree("(?").is_err());
2052 assert!(Expr::parse_tree("(?)").is_err());
2053 assert!(Expr::parse_tree("(?-)").is_err());
2054 assert!(Expr::parse_tree("(?-:a)").is_err());
2055 assert!(Expr::parse_tree("(?q:a)").is_err());
2056 }
2057
2058 #[test]
2059 fn lifetime() {
2060 assert_eq!(
2061 p("\\'[a-zA-Z_][a-zA-Z0-9_]*(?!\\')\\b"),
2062 Expr::Concat(vec![
2063 make_literal("'"),
2064 Expr::Delegate {
2065 inner: String::from("[a-zA-Z_]"),
2066 size: 1,
2067 casei: false
2068 },
2069 Expr::Repeat {
2070 child: Box::new(Expr::Delegate {
2071 inner: String::from("[a-zA-Z0-9_]"),
2072 size: 1,
2073 casei: false
2074 }),
2075 lo: 0,
2076 hi: usize::MAX,
2077 greedy: true
2078 },
2079 Expr::LookAround(Box::new(make_literal("'")), LookAheadNeg),
2080 Expr::Assertion(Assertion::WordBoundary),
2081 ])
2082 );
2083 }
2084
2085 #[test]
2086 fn ignore_whitespace() {
2087 assert_eq!(p("(?x: )"), p(""));
2088 assert_eq!(p("(?x) | "), p("|"));
2089 assert_eq!(p("(?x: a )"), p("a"));
2090 assert_eq!(p("(?x: a # ) bobby tables\n b )"), p("ab"));
2091 assert_eq!(p("(?x: a | b )"), p("a|b"));
2092 assert_eq!(p("(?x: ( a b ) )"), p("(ab)"));
2093 assert_eq!(p("(?x: a + )"), p("a+"));
2094 assert_eq!(p("(?x: a {2} )"), p("a{2}"));
2095 assert_eq!(p("(?x: a { 2 } )"), p("a{2}"));
2096 assert_eq!(p("(?x: a { 2 , } )"), p("a{2,}"));
2097 assert_eq!(p("(?x: a { , 2 } )"), p("a{,2}"));
2098 assert_eq!(p("(?x: a { 2 , 3 } )"), p("a{2,3}"));
2099 assert_eq!(p("(?x: a { 2 , 3 } ? )"), p("a{2,3}?"));
2100 assert_eq!(p("(?x: ( ? i : . ) )"), p("(?i:.)"));
2101 assert_eq!(p("(?x: ( ?= a ) )"), p("(?=a)"));
2102 assert_eq!(p("(?x: [ ] )"), p("[ ]"));
2103 assert_eq!(p("(?x: [ ^] )"), p("[ ^]"));
2104 assert_eq!(p("(?x: [a - z] )"), p("[a - z]"));
2105 assert_eq!(p("(?x: [ \\] \\\\] )"), p("[ \\] \\\\]"));
2106 assert_eq!(p("(?x: a\\ b )"), p("a b"));
2107 assert_eq!(p("(?x: a (?-x:#) b )"), p("a#b"));
2108 assert_eq!(p("(?x: a (?# b))c"), p("ac"));
2109 assert_eq!(p("(?x: \\b { s t a r t} a )"), p("\\b{start}a"));
2110 assert_eq!(p("(?x: \\b {end} )"), p("\\b{end}"));
2111 assert_eq!(p("(?x: \\b { start-half } )"), p("\\b{start-half}"));
2112 assert_eq!(p("(?x: \\b { e n d - h a l f } )"), p("\\b{end-half}"));
2113 assert_eq!(p("(?x: \\b{s\nt\ra\trt} a)"), p("\\b{start}a"));
2114 assert_eq!(p("(?x: \\u { 0 0 3 2} a)"), p("\\u{0032}a"));
2115 assert_eq!(p("(?x: \\U { 0001 F600 } a)"), p("\\U{0001F600}a"));
2116 assert_eq!(p("(?x: \\x { 3 2} a)"), p("\\x{32}a"));
2117 assert_eq!(p("(?x: \\b { s t a # x\nr t} a )"), p("\\b{start}a"));
2118 assert_eq!(p("(?x: \\b { s t a (?# x)r t} a )"), p("\\b{start}a"));
2119 assert_eq!(p("(?x: \\u { 0 0 # x\n3 2} a)"), p("\\u{0032}a"));
2120 assert_eq!(p("(?x: \\u { 0 0 (?# x)3 2} a)"), p("\\u{0032}a"));
2121 }
2122
2123 #[test]
2124 fn comments() {
2125 assert_eq!(p(r"ab(?# comment)"), p("ab"));
2126 assert_eq!(p(r"ab(?#)"), p("ab"));
2127 assert_eq!(p(r"(?# comment 1)(?# comment 2)ab"), p("ab"));
2128 assert_eq!(p(r"ab(?# comment \))c"), p("abc"));
2129 assert_eq!(p(r"ab(?# comment \\)c"), p("abc"));
2130 assert_eq!(p(r"ab(?# comment ()c"), p("abc"));
2131 assert_eq!(p(r"ab(?# comment)*"), p("ab*"));
2132 fail(r"ab(?# comment");
2133 fail(r"ab(?# comment\");
2134 }
2135
2136 #[test]
2137 fn atomic_group() {
2138 assert_eq!(p("(?>a)"), Expr::AtomicGroup(Box::new(make_literal("a"))));
2139 }
2140
2141 #[test]
2142 fn possessive() {
2143 assert_eq!(
2144 p("a++"),
2145 Expr::AtomicGroup(Box::new(Expr::Repeat {
2146 child: Box::new(make_literal("a")),
2147 lo: 1,
2148 hi: usize::MAX,
2149 greedy: true
2150 }))
2151 );
2152 assert_eq!(
2153 p("a*+"),
2154 Expr::AtomicGroup(Box::new(Expr::Repeat {
2155 child: Box::new(make_literal("a")),
2156 lo: 0,
2157 hi: usize::MAX,
2158 greedy: true
2159 }))
2160 );
2161 assert_eq!(
2162 p("a?+"),
2163 Expr::AtomicGroup(Box::new(Expr::Repeat {
2164 child: Box::new(make_literal("a")),
2165 lo: 0,
2166 hi: 1,
2167 greedy: true
2168 }))
2169 );
2170 }
2171
2172 #[test]
2173 fn invalid_backref() {
2174 assert_error(
2176 r".\18446744073709551616",
2177 "Parsing error at position 2: Invalid back reference",
2178 ); assert_error(r".\c", "Parsing error at position 1: Invalid escape: \\c"); assert_error(
2182 r"a\1",
2183 "Parsing error at position 2: Invalid back reference",
2184 ); assert_error(
2186 r"(a)\2",
2187 "Parsing error at position 4: Invalid back reference",
2188 ); }
2190
2191 #[test]
2192 fn invalid_group_name_backref() {
2193 assert_error(
2194 "\\k<id>(?<id>.)",
2195 "Parsing error at position 2: Invalid group name in back reference: id",
2196 );
2197 }
2198
2199 #[test]
2200 fn named_backref_only() {
2201 assert_error("(?<id>.)\\1", "Error compiling regex: Numbered backref/call not allowed because named group was used, use a named backref instead");
2202 assert_error("(a)\\1(?<name>b)", "Error compiling regex: Numbered backref/call not allowed because named group was used, use a named backref instead");
2203 }
2204
2205 #[test]
2206 fn invalid_group_name() {
2207 assert_error(
2208 "(?<id)",
2209 "Parsing error at position 1: Could not parse group name",
2210 );
2211 assert_error(
2212 "(?<>)",
2213 "Parsing error at position 1: Could not parse group name",
2214 );
2215 assert_error(
2216 "(?<#>)",
2217 "Parsing error at position 1: Could not parse group name",
2218 );
2219 assert_error(
2220 "\\kxxx<id>",
2221 "Parsing error at position 2: Could not parse group name",
2222 );
2223 assert_error(
2225 "\\k<id-withdash>",
2226 "Parsing error at position 2: Could not parse group name",
2227 );
2228 assert_error(
2229 "\\k<-id>",
2230 "Parsing error at position 2: Could not parse group name",
2231 );
2232 }
2233
2234 #[test]
2235 fn unknown_flag() {
2236 assert_error(
2237 "(?-:a)",
2238 "Parsing error at position 2: Unknown group flag: (?-:",
2239 );
2240 assert_error(
2241 "(?)",
2242 "Parsing error at position 2: Unknown group flag: (?)",
2243 );
2244 assert_error(
2245 "(?--)",
2246 "Parsing error at position 2: Unknown group flag: (?--",
2247 );
2248 assert_error(
2250 "(?\u{1F60A})",
2251 "Parsing error at position 2: Unknown group flag: (?\u{1F60A}",
2252 );
2253 }
2254
2255 #[test]
2256 fn no_quantifiers_on_lookarounds() {
2257 assert_error(
2258 "(?=hello)+",
2259 "Parsing error at position 9: Target of repeat operator is invalid",
2260 );
2261 assert_error(
2262 "(?<!hello)*",
2263 "Parsing error at position 10: Target of repeat operator is invalid",
2264 );
2265 assert_error(
2266 "(?<=hello){2,3}",
2267 "Parsing error at position 14: Target of repeat operator is invalid",
2268 );
2269 assert_error(
2270 "(?!hello)?",
2271 "Parsing error at position 9: Target of repeat operator is invalid",
2272 );
2273 assert_error(
2274 "(a|b|?)",
2275 "Parsing error at position 5: Target of repeat operator is invalid",
2276 );
2277 }
2278
2279 #[test]
2280 fn no_quantifiers_on_assertions_in_oniguruma_mode() {
2281 assert_error_oniguruma(
2282 r"\b{1}",
2283 "Parsing error at position 4: Target of repeat operator is invalid",
2284 );
2285 assert_error_oniguruma(
2286 r"\B{2}",
2287 "Parsing error at position 4: Target of repeat operator is invalid",
2288 );
2289 assert_error_oniguruma(
2290 r"^{3}",
2291 "Parsing error at position 3: Target of repeat operator is invalid",
2292 );
2293 assert_error_oniguruma(
2294 r"${1,5}",
2295 "Parsing error at position 5: Target of repeat operator is invalid",
2296 );
2297 assert_error_oniguruma(
2298 r"\A*",
2299 "Parsing error at position 2: Target of repeat operator is invalid",
2300 );
2301 assert_error_oniguruma(
2302 r"\z+",
2303 "Parsing error at position 2: Target of repeat operator is invalid",
2304 );
2305 assert_error_oniguruma(
2306 r"^?",
2307 "Parsing error at position 1: Target of repeat operator is invalid",
2308 );
2309 assert_error_oniguruma(
2310 r"${2}",
2311 "Parsing error at position 3: Target of repeat operator is invalid",
2312 );
2313 assert_error_oniguruma(
2314 r"(?m)^?",
2315 "Parsing error at position 5: Target of repeat operator is invalid",
2316 );
2317 assert_error_oniguruma(
2318 r"(?m)${2}",
2319 "Parsing error at position 7: Target of repeat operator is invalid",
2320 );
2321 assert_error_oniguruma(
2322 r"\b+",
2323 "Parsing error at position 2: Target of repeat operator is invalid",
2324 );
2325 }
2326
2327 #[test]
2328 fn keepout() {
2329 assert_eq!(
2330 p("a\\Kb"),
2331 Expr::Concat(vec![make_literal("a"), Expr::KeepOut, make_literal("b"),])
2332 );
2333 }
2334
2335 #[test]
2336 fn no_quantifiers_on_other_non_repeatable_expressions() {
2337 assert_error(
2338 r"\K?",
2339 "Parsing error at position 2: Target of repeat operator is invalid",
2340 );
2341 assert_error(
2342 r"\G*",
2343 "Parsing error at position 2: Target of repeat operator is invalid",
2344 );
2345 }
2346
2347 #[test]
2348 fn backref_exists_condition() {
2349 assert_eq!(
2350 p("(h)?(?(1))"),
2351 Expr::Concat(vec![
2352 Expr::Repeat {
2353 child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2354 lo: 0,
2355 hi: 1,
2356 greedy: true
2357 },
2358 Expr::BackrefExistsCondition(1)
2359 ])
2360 );
2361 assert_eq!(
2362 p("(?<h>h)?(?('h'))"),
2363 Expr::Concat(vec![
2364 Expr::Repeat {
2365 child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2366 lo: 0,
2367 hi: 1,
2368 greedy: true
2369 },
2370 Expr::BackrefExistsCondition(1)
2371 ])
2372 );
2373 }
2374
2375 #[test]
2376 fn conditional_non_backref_validity_check_without_branches() {
2377 assert_error(
2378 "(?(foo))",
2379 "Parsing error at position 7: General parsing error: expected conditional to be a backreference or at least an expression for when the condition is true",
2380 );
2381 }
2382
2383 #[test]
2384 fn conditional_invalid_target_of_repeat_operator() {
2385 assert_error(
2386 r"(?(?=\d)\w|!)",
2387 "Parsing error at position 3: Target of repeat operator is invalid",
2388 );
2389 }
2390
2391 #[test]
2392 fn conditional_unclosed_at_end_of_pattern() {
2393 assert_error(
2394 r"(?(",
2395 "Parsing error at position 3: Opening parenthesis without closing parenthesis",
2396 );
2397 }
2398
2399 #[test]
2400 fn subroutine_call_unclosed_at_end_of_pattern() {
2401 assert_error(
2402 r"\g<",
2403 "Parsing error at position 2: Could not parse group name",
2404 );
2405
2406 assert_error(
2407 r"\g<name",
2408 "Parsing error at position 2: Could not parse group name",
2409 );
2410
2411 assert_error(
2412 r"\g'",
2413 "Parsing error at position 2: Could not parse group name",
2414 );
2415
2416 assert_error(
2417 r"\g''",
2418 "Parsing error at position 2: Could not parse group name",
2419 );
2420
2421 assert_error(
2422 r"\g<>",
2423 "Parsing error at position 2: Could not parse group name",
2424 );
2425
2426 assert_error(r"\g", "Parsing error at position 0: Invalid escape: \\g");
2427
2428 assert_error(
2429 r"\g test",
2430 "Parsing error at position 2: Could not parse group name",
2431 );
2432 }
2433
2434 #[test]
2435 fn subroutine_call_missing_subroutine_reference() {
2436 assert_error(
2437 r"\g test",
2438 "Parsing error at position 2: Could not parse group name",
2439 );
2440 }
2441
2442 #[test]
2443 fn subroutine_call_name_includes_dash() {
2444 assert_error(
2445 r"\g<1-0>(a)",
2446 "Parsing error at position 2: Could not parse group name",
2447 );
2448 assert_error(
2449 r"\g<name+1>(?'name'a)",
2450 "Parsing error at position 2: Could not parse group name",
2451 );
2452 }
2453
2454 #[test]
2455 fn backref_condition_with_one_two_or_three_branches() {
2456 assert_eq!(
2457 p("(h)?(?(1)i|x)"),
2458 Expr::Concat(vec![
2459 Expr::Repeat {
2460 child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2461 lo: 0,
2462 hi: 1,
2463 greedy: true
2464 },
2465 Expr::Conditional {
2466 condition: Box::new(Expr::BackrefExistsCondition(1)),
2467 true_branch: Box::new(make_literal("i")),
2468 false_branch: Box::new(make_literal("x")),
2469 },
2470 ])
2471 );
2472
2473 assert_eq!(
2474 p("(h)?(?(1)i)"),
2475 Expr::Concat(vec![
2476 Expr::Repeat {
2477 child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2478 lo: 0,
2479 hi: 1,
2480 greedy: true
2481 },
2482 Expr::Conditional {
2483 condition: Box::new(Expr::BackrefExistsCondition(1)),
2484 true_branch: Box::new(make_literal("i")),
2485 false_branch: Box::new(Expr::Empty),
2486 },
2487 ])
2488 );
2489
2490 assert_eq!(
2491 p("(h)?(?(1)ii|xy|z)"),
2492 Expr::Concat(vec![
2493 Expr::Repeat {
2494 child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2495 lo: 0,
2496 hi: 1,
2497 greedy: true
2498 },
2499 Expr::Conditional {
2500 condition: Box::new(Expr::BackrefExistsCondition(1)),
2501 true_branch: Box::new(Expr::Concat(
2502 vec![make_literal("i"), make_literal("i"),]
2503 )),
2504 false_branch: Box::new(Expr::Alt(vec![
2505 Expr::Concat(vec![make_literal("x"), make_literal("y"),]),
2506 make_literal("z"),
2507 ])),
2508 },
2509 ])
2510 );
2511
2512 assert_eq!(
2513 p("(?<cap>h)?(?(<cap>)ii|xy|z)"),
2514 Expr::Concat(vec![
2515 Expr::Repeat {
2516 child: Box::new(Expr::Group(Box::new(make_literal("h")))),
2517 lo: 0,
2518 hi: 1,
2519 greedy: true
2520 },
2521 Expr::Conditional {
2522 condition: Box::new(Expr::BackrefExistsCondition(1)),
2523 true_branch: Box::new(Expr::Concat(
2524 vec![make_literal("i"), make_literal("i"),]
2525 )),
2526 false_branch: Box::new(Expr::Alt(vec![
2527 Expr::Concat(vec![make_literal("x"), make_literal("y"),]),
2528 make_literal("z"),
2529 ])),
2530 },
2531 ])
2532 );
2533 }
2534
2535 #[test]
2536 fn conditional() {
2537 assert_eq!(
2538 p("((?(a)b|c))(\\1)"),
2539 Expr::Concat(vec![
2540 Expr::Group(Box::new(Expr::Conditional {
2541 condition: Box::new(make_literal("a")),
2542 true_branch: Box::new(make_literal("b")),
2543 false_branch: Box::new(make_literal("c"))
2544 })),
2545 Expr::Group(Box::new(Expr::Backref {
2546 group: 1,
2547 casei: false,
2548 },))
2549 ])
2550 );
2551
2552 assert_eq!(
2553 p(r"^(?(\d)abc|\d!)$"),
2554 Expr::Concat(vec![
2555 Expr::Assertion(Assertion::StartText),
2556 Expr::Conditional {
2557 condition: Box::new(Expr::Delegate {
2558 inner: "\\d".to_string(),
2559 size: 1,
2560 casei: false,
2561 }),
2562 true_branch: Box::new(Expr::Concat(vec![
2563 make_literal("a"),
2564 make_literal("b"),
2565 make_literal("c"),
2566 ])),
2567 false_branch: Box::new(Expr::Concat(vec![
2568 Expr::Delegate {
2569 inner: "\\d".to_string(),
2570 size: 1,
2571 casei: false,
2572 },
2573 make_literal("!"),
2574 ])),
2575 },
2576 Expr::Assertion(Assertion::EndText),
2577 ])
2578 );
2579
2580 assert_eq!(
2581 p(r"(?((?=\d))\w|!)"),
2582 Expr::Conditional {
2583 condition: Box::new(Expr::LookAround(
2584 Box::new(Expr::Delegate {
2585 inner: "\\d".to_string(),
2586 size: 1,
2587 casei: false
2588 }),
2589 LookAhead
2590 )),
2591 true_branch: Box::new(Expr::Delegate {
2592 inner: "\\w".to_string(),
2593 size: 1,
2594 casei: false,
2595 }),
2596 false_branch: Box::new(make_literal("!")),
2597 },
2598 );
2599
2600 assert_eq!(
2601 p(r"(?((ab))c|d)"),
2602 Expr::Conditional {
2603 condition: Box::new(Expr::Group(Box::new(Expr::Concat(vec![
2604 make_literal("a"),
2605 make_literal("b"),
2606 ]),))),
2607 true_branch: Box::new(make_literal("c")),
2608 false_branch: Box::new(make_literal("d")),
2609 },
2610 );
2611 }
2612
2613 #[test]
2614 fn subroutines() {
2615 assert_eq!(
2616 p(r"(a)\g1"),
2617 Expr::Concat(vec![
2618 Expr::Group(Box::new(make_literal("a"))),
2619 Expr::SubroutineCall(1)
2620 ])
2621 );
2622
2623 assert_eq!(
2624 p(r"(a)\g<1>"),
2625 Expr::Concat(vec![
2626 Expr::Group(Box::new(make_literal("a"))),
2627 Expr::SubroutineCall(1)
2628 ])
2629 );
2630
2631 assert_eq!(
2632 p(r"(?<group_name>a)\g<group_name>"),
2633 Expr::Concat(vec![
2634 Expr::Group(Box::new(make_literal("a"))),
2635 Expr::SubroutineCall(1)
2636 ])
2637 );
2638
2639 assert_eq!(
2640 p(r"(?<group_name>a)\g'group_name'"),
2641 Expr::Concat(vec![
2642 Expr::Group(Box::new(make_literal("a"))),
2643 Expr::SubroutineCall(1)
2644 ])
2645 );
2646
2647 assert_eq!(
2648 p(r"(?<group_name>a)(?P>group_name)"),
2649 Expr::Concat(vec![
2650 Expr::Group(Box::new(make_literal("a"))),
2651 Expr::SubroutineCall(1)
2652 ])
2653 );
2654 }
2655
2656 #[test]
2657 fn subroutine_defined_later() {
2658 assert_eq!(
2659 p(r"\g<name>(?<name>a)"),
2660 Expr::Concat(vec![
2661 Expr::SubroutineCall(1),
2662 Expr::Group(Box::new(make_literal("a"))),
2663 ])
2664 );
2665
2666 assert_eq!(
2667 p(r"\g<c>(?:a|b|(?<c>c)?)"),
2668 Expr::Concat(vec![
2669 Expr::SubroutineCall(1),
2670 Expr::Alt(vec![
2671 make_literal("a"),
2672 make_literal("b"),
2673 Expr::Repeat {
2674 child: Box::new(Expr::Group(Box::new(make_literal("c")))),
2675 lo: 0,
2676 hi: 1,
2677 greedy: true
2678 }
2679 ])
2680 ])
2681 );
2682
2683 assert_eq!(
2684 p(r"(?<a>a)?\g<b>(?(<a>)(?<b>b)|c)"),
2685 Expr::Concat(vec![
2686 Expr::Repeat {
2687 child: Box::new(Expr::Group(Box::new(make_literal("a")))),
2688 lo: 0,
2689 hi: 1,
2690 greedy: true
2691 },
2692 Expr::SubroutineCall(2),
2693 Expr::Conditional {
2694 condition: Box::new(Expr::BackrefExistsCondition(1)),
2695 true_branch: Box::new(Expr::Group(Box::new(make_literal("b")))),
2696 false_branch: Box::new(make_literal("c")),
2697 }
2698 ])
2699 );
2700
2701 assert_eq!(
2702 p(r"\g<1>(a)"),
2703 Expr::Concat(vec![
2704 Expr::SubroutineCall(1),
2705 Expr::Group(Box::new(make_literal("a"))),
2706 ])
2707 );
2708 }
2709
2710 #[test]
2711 fn recursive_subroutine_call() {
2712 assert_eq!(
2713 p(r"\A(?<a>|.|(?:(?<b>.)\g<a>\k<b>))\z"),
2714 Expr::Concat(vec![
2715 Expr::Assertion(Assertion::StartText,),
2716 Expr::Group(Box::new(Expr::Alt(vec![
2717 Expr::Empty,
2718 Expr::Any { newline: false },
2719 Expr::Concat(vec![
2720 Expr::Group(Box::new(Expr::Any { newline: false },)),
2721 Expr::SubroutineCall(1,),
2722 Expr::Backref {
2723 group: 2,
2724 casei: false,
2725 },
2726 ],),
2727 ],),)),
2728 Expr::Assertion(Assertion::EndText,),
2729 ],)
2730 );
2731 }
2732
2733 #[test]
2734 fn self_recursive_subroutine_call() {
2735 let tree = Expr::parse_tree(r"hello\g<0>?world").unwrap();
2736 assert!(tree.self_recursive);
2737
2738 let tree = Expr::parse_tree(r"hello\g0?world").unwrap();
2739 assert!(tree.self_recursive);
2740
2741 let tree = Expr::parse_tree(r"hello world").unwrap();
2742 assert!(!tree.self_recursive);
2743
2744 let tree = Expr::parse_tree(r"hello\g1world").unwrap();
2745 assert!(!tree.self_recursive);
2746
2747 let tree = Expr::parse_tree(r"hello\g<1>world").unwrap();
2748 assert!(!tree.self_recursive);
2749
2750 let tree = Expr::parse_tree(r"(hello\g1?world)").unwrap();
2751 assert!(!tree.self_recursive);
2752
2753 let tree = Expr::parse_tree(r"(?<a>hello\g<a>world)").unwrap();
2754 assert!(!tree.self_recursive);
2755 }
2756
2757 #[test]
2758 fn named_subroutine_not_defined_later() {
2759 assert_eq!(
2760 p(r"\g<wrong_name>(?<different_name>a)"),
2761 Expr::Concat(vec![
2762 Expr::UnresolvedNamedSubroutineCall {
2763 name: "wrong_name".to_string(),
2764 ix: 2
2765 },
2766 Expr::Group(Box::new(make_literal("a"))),
2767 ])
2768 );
2769 }
2770
2771 #[test]
2773 fn fuzz_1() {
2774 p(r"\ä");
2775 }
2776
2777 #[test]
2778 fn fuzz_2() {
2779 p(r"\pä");
2780 }
2781
2782 #[test]
2783 fn fuzz_3() {
2784 fail(r"(?()^");
2785 fail(r#"!w(?()\"Kuz>"#);
2786 }
2787
2788 #[test]
2789 fn fuzz_4() {
2790 fail(r"\u{2}(?(2)");
2791 }
2792
2793 #[test]
2794 fn word_boundary_brace_syntax() {
2795 assert_eq!(
2797 p(r"\b{start}"),
2798 Expr::Assertion(Assertion::LeftWordBoundary)
2799 );
2800 assert_eq!(p(r"\b{end}"), Expr::Assertion(Assertion::RightWordBoundary));
2801 assert_eq!(
2802 p(r"\b{start-half}"),
2803 Expr::Assertion(Assertion::LeftWordHalfBoundary)
2804 );
2805 assert_eq!(
2806 p(r"\b{end-half}"),
2807 Expr::Assertion(Assertion::RightWordHalfBoundary)
2808 );
2809 }
2810
2811 #[test]
2812 fn word_boundary_brace_parsing_errors() {
2813 let test_cases = [
2815 (r"\b{invalid}", "Invalid escape: \\b{invalid}"),
2816 (r"\b{start ", "Invalid escape: \\b{...}"),
2817 (r"\b{end ", "Invalid escape: \\b{...}"),
2818 (r"\b{}", "Invalid escape: \\b{}"),
2819 (r"\b{", "Invalid escape: \\b{...}"),
2820 (r"\b{ }", "Invalid escape: \\b{ }"),
2821 (r"\b{START}", "Invalid escape: \\b{START}"),
2822 (r"\b{END}", "Invalid escape: \\b{END}"),
2823 (r"\b{ s t a r t }", "Invalid escape: \\b{ s t a r t }"),
2824 (r"\B{start}", "Invalid escape: \\B{start}"),
2826 (r"\B{end}", "Invalid escape: \\B{end}"),
2827 (r"\B{start-half}", "Invalid escape: \\B{start-half}"),
2828 (r"\B{end-half}", "Invalid escape: \\B{end-half}"),
2829 (r"\c{start}", "Invalid escape: \\c"),
2830 ];
2831
2832 for (pattern, expected_error) in test_cases {
2833 assert_error(
2834 pattern,
2835 &format!("Parsing error at position 0: {}", expected_error),
2836 );
2837 }
2838 }
2839
2840 fn get_options(pattern: &str, func: impl Fn(SyntaxConfig) -> SyntaxConfig) -> RegexOptions {
2841 let mut options = RegexOptions::default();
2842 options.syntaxc = func(options.syntaxc);
2843 options.pattern = String::from(pattern);
2844 options
2845 }
2846
2847 #[test]
2848 fn parse_with_case_insensitive_in_pattern() {
2849 let tree = Expr::parse_tree("(?i)hello");
2850 let expr = tree.unwrap().expr;
2851
2852 assert_eq!(
2853 expr,
2854 Expr::Concat(vec![
2855 make_literal_case_insensitive("h", true),
2856 make_literal_case_insensitive("e", true),
2857 make_literal_case_insensitive("l", true),
2858 make_literal_case_insensitive("l", true),
2859 make_literal_case_insensitive("o", true)
2860 ])
2861 );
2862 }
2863
2864 #[test]
2865 fn parse_with_case_insensitive_option() {
2866 let options = get_options("hello", |x| x.case_insensitive(true));
2867
2868 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2869 let expr = tree.unwrap().expr;
2870
2871 assert_eq!(
2872 expr,
2873 Expr::Concat(vec![
2874 make_literal_case_insensitive("h", true),
2875 make_literal_case_insensitive("e", true),
2876 make_literal_case_insensitive("l", true),
2877 make_literal_case_insensitive("l", true),
2878 make_literal_case_insensitive("o", true)
2879 ])
2880 );
2881 }
2882
2883 #[test]
2884 fn parse_with_multiline_in_pattern() {
2885 let options = get_options("(?m)^hello$", |x| x);
2886
2887 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2888 let expr = tree.unwrap().expr;
2889
2890 assert_eq!(
2891 expr,
2892 Expr::Concat(vec![
2893 Expr::Assertion(Assertion::StartLine { crlf: false }),
2894 make_literal("h"),
2895 make_literal("e"),
2896 make_literal("l"),
2897 make_literal("l"),
2898 make_literal("o"),
2899 Expr::Assertion(Assertion::EndLine { crlf: false })
2900 ])
2901 );
2902 }
2903
2904 #[test]
2905 fn pparse_with_multiline_option() {
2906 let options = get_options("^hello$", |x| x.multi_line(true));
2907
2908 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2909 let expr = tree.unwrap().expr;
2910
2911 assert_eq!(
2912 expr,
2913 Expr::Concat(vec![
2914 Expr::Assertion(Assertion::StartLine { crlf: false }),
2915 make_literal("h"),
2916 make_literal("e"),
2917 make_literal("l"),
2918 make_literal("l"),
2919 make_literal("o"),
2920 Expr::Assertion(Assertion::EndLine { crlf: false })
2921 ])
2922 );
2923 }
2924
2925 #[test]
2926 fn parse_with_dot_matches_new_line_in_pattern() {
2927 let options = get_options("(?s)(.*)", |x| x);
2928
2929 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2930 let expr = tree.unwrap().expr;
2931
2932 assert_eq!(
2933 expr,
2934 Expr::Group(Box::new(Expr::Repeat {
2935 child: Box::new(Expr::Any { newline: true }),
2936 lo: 0,
2937 hi: usize::MAX,
2938 greedy: true
2939 }))
2940 );
2941 }
2942
2943 #[test]
2944 fn parse_with_dot_matches_new_line_option() {
2945 let options = get_options("(.*)", |x| x.dot_matches_new_line(true));
2946
2947 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2948 let expr = tree.unwrap().expr;
2949
2950 assert_eq!(
2951 expr,
2952 Expr::Group(Box::new(Expr::Repeat {
2953 child: Box::new(Expr::Any { newline: true }),
2954 lo: 0,
2955 hi: usize::MAX,
2956 greedy: true
2957 }))
2958 );
2959 }
2960
2961 #[test]
2962 fn parse_fancy_with_dot_matches_new_line_in_pattern() {
2963 let options = get_options("(.*)(?<=hugo)", |x| x.dot_matches_new_line(true));
2964
2965 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2966 let expr = tree.unwrap().expr;
2967
2968 assert_eq!(
2969 expr,
2970 Expr::Concat(vec![
2971 Expr::Group(Box::new(Expr::Repeat {
2972 child: Box::new(Expr::Any { newline: true }),
2973 lo: 0,
2974 hi: usize::MAX,
2975 greedy: true
2976 })),
2977 Expr::LookAround(
2978 Box::new(Expr::Concat(vec![
2979 make_literal("h"),
2980 make_literal("u"),
2981 make_literal("g"),
2982 make_literal("o")
2983 ])),
2984 LookBehind
2985 )
2986 ])
2987 );
2988 }
2989
2990 #[test]
2991 fn parse_with_case_insensitre_from_pattern_and_multi_line_option() {
2992 let options = get_options("(?i)^hello$", |x| x.multi_line(true));
2993
2994 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
2995 let expr = tree.unwrap().expr;
2996
2997 assert_eq!(
2998 expr,
2999 Expr::Concat(vec![
3000 Expr::Assertion(Assertion::StartLine { crlf: false }),
3001 make_literal_case_insensitive("h", true),
3002 make_literal_case_insensitive("e", true),
3003 make_literal_case_insensitive("l", true),
3004 make_literal_case_insensitive("l", true),
3005 make_literal_case_insensitive("o", true),
3006 Expr::Assertion(Assertion::EndLine { crlf: false })
3007 ])
3008 );
3009 }
3010
3011 #[test]
3012 fn parse_with_multi_line_and_case_insensitive_options() {
3013 let mut options = get_options("^hello$", |x| x.multi_line(true));
3014 options.syntaxc = options.syntaxc.case_insensitive(true);
3015
3016 let tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags());
3017 let expr = tree.unwrap().expr;
3018
3019 assert_eq!(
3020 expr,
3021 Expr::Concat(vec![
3022 Expr::Assertion(Assertion::StartLine { crlf: false }),
3023 make_literal_case_insensitive("h", true),
3024 make_literal_case_insensitive("e", true),
3025 make_literal_case_insensitive("l", true),
3026 make_literal_case_insensitive("l", true),
3027 make_literal_case_insensitive("o", true),
3028 Expr::Assertion(Assertion::EndLine { crlf: false })
3029 ])
3030 );
3031 }
3032}