1use crate::api;
4use crate::types::{BracketContents, CaptureGroupID, CaptureGroupName};
5#[cfg(not(feature = "std"))]
6use alloc::{boxed::Box, string::ToString, vec::Vec};
7use core::fmt;
8
9#[derive(Debug, Copy, Clone)]
10pub enum AnchorType {
11 StartOfLine, EndOfLine, }
14
15#[derive(Debug, Copy, Clone)]
17pub struct Quantifier {
18 pub min: usize,
20
21 pub max: Option<usize>,
24
25 pub greedy: bool,
27}
28
29#[derive(Debug)]
31pub enum Node {
32 Empty,
34
35 Goal,
37
38 Char { c: u32, icase: bool },
41
42 ByteSequence(Vec<u8>),
44
45 ByteSet(Vec<u8>),
48
49 CharSet(Vec<u32>),
52
53 Cat(Vec<Node>),
55
56 Alt(Box<Node>, Box<Node>),
58
59 MatchAny,
61
62 MatchAnyExceptLineTerminator,
64
65 Anchor {
68 anchor_type: AnchorType,
69 multiline: bool,
70 },
71
72 WordBoundary { invert: bool },
74
75 CaptureGroup {
79 id: CaptureGroupID,
80 contents: Box<Node>,
81 name: Option<CaptureGroupName>,
82 },
83
84 BackRef(u32),
90
91 Bracket(BracketContents),
93
94 LookaroundAssertion {
96 negate: bool,
97 backwards: bool,
98 start_group: CaptureGroupID,
99 end_group: CaptureGroupID,
100 contents: Box<Node>,
101 },
102
103 Loop {
105 loopee: Box<Node>,
106 quant: Quantifier,
107 enclosed_groups: core::ops::Range<u16>,
108 },
109
110 Loop1CharBody {
113 loopee: Box<Node>,
114 quant: Quantifier,
115 },
116}
117
118pub type NodeList = Vec<Node>;
119
120impl Node {
121 pub fn make_always_fails() -> Node {
123 Node::CharSet(Vec::new())
124 }
125
126 pub fn reverse_cats(&mut self, w: &mut Walk) {
129 match self {
130 Node::Cat(nodes) if w.in_lookbehind => nodes.reverse(),
131 Node::ByteSequence(..) => panic!("Should not be reversing literal bytes"),
132 _ => {}
133 }
134 }
135
136 pub fn is_empty(&self) -> bool {
138 matches!(self, Node::Empty)
139 }
140
141 pub fn is_cat(&self) -> bool {
143 matches!(self, Node::Cat(..))
144 }
145
146 pub fn matches_exactly_one_char(&self) -> bool {
149 match self {
150 Node::Char { .. } => true,
151 Node::CharSet(contents) => !contents.is_empty(),
152 Node::Bracket(contents) => !contents.is_empty(),
153 Node::MatchAny => true,
154 Node::MatchAnyExceptLineTerminator => true,
155 _ => false,
156 }
157 }
158
159 pub fn match_always_fails(&self) -> bool {
164 match self {
165 Node::ByteSet(bytes) => bytes.is_empty(),
166 Node::CharSet(contents) => contents.is_empty(),
167 Node::Bracket(contents) => contents.is_empty(),
168 _ => false,
169 }
170 }
171
172 pub fn try_duplicate(&self, mut depth: usize) -> Option<Node> {
177 if depth > 100 {
178 return None;
179 }
180 depth += 1;
181 Some(match self {
182 Node::Empty => Node::Empty,
183 Node::Goal => Node::Goal,
184 &Node::Char { c, icase } => Node::Char { c, icase },
185 Node::ByteSequence(bytes) => Node::ByteSequence(bytes.clone()),
186 Node::ByteSet(bytes) => Node::ByteSet(bytes.clone()),
187 Node::CharSet(chars) => Node::CharSet(chars.clone()),
188 Node::Cat(nodes) => {
189 let mut new_nodes = Vec::with_capacity(nodes.len());
190 for n in nodes {
191 new_nodes.push(n.try_duplicate(depth)?);
192 }
193 Node::Cat(new_nodes)
194 }
195 Node::Alt(left, right) => Node::Alt(
196 Box::new(left.try_duplicate(depth)?),
197 Box::new(right.try_duplicate(depth)?),
198 ),
199 Node::MatchAny => Node::MatchAny,
200 Node::MatchAnyExceptLineTerminator => Node::MatchAnyExceptLineTerminator,
201 &Node::Anchor {
202 anchor_type,
203 multiline,
204 } => Node::Anchor {
205 anchor_type,
206 multiline,
207 },
208
209 Node::Loop {
210 loopee,
211 quant,
212 enclosed_groups,
213 } => {
214 assert!(
215 enclosed_groups.start >= enclosed_groups.end,
216 "Cannot duplicate a loop with enclosed groups"
217 );
218 Node::Loop {
219 loopee: Box::new(loopee.as_ref().try_duplicate(depth)?),
220 quant: *quant,
221 enclosed_groups: enclosed_groups.clone(),
222 }
223 }
224
225 Node::Loop1CharBody { loopee, quant } => Node::Loop1CharBody {
226 loopee: Box::new(loopee.as_ref().try_duplicate(depth)?),
227 quant: *quant,
228 },
229
230 Node::CaptureGroup { .. } => {
231 panic!("Refusing to duplicate a capture group");
232 }
233 &Node::WordBoundary { invert } => Node::WordBoundary { invert },
234 &Node::BackRef(idx) => Node::BackRef(idx),
235 Node::Bracket(bc) => Node::Bracket(bc.clone()),
236 Node::LookaroundAssertion {
238 negate,
239 backwards,
240 start_group,
241 end_group,
242 contents,
243 } => {
244 assert!(
245 start_group >= end_group,
246 "Cannot duplicate an assertion with enclosed groups"
247 );
248 Node::LookaroundAssertion {
249 negate: *negate,
250 backwards: *backwards,
251 start_group: *start_group,
252 end_group: *end_group,
253 contents: Box::new((*contents).try_duplicate(depth)?),
254 }
255 }
256 })
257 }
258}
259
260#[derive(Debug, Clone)]
262pub struct Walk {
263 pub skip_children: bool,
265
266 pub depth: usize,
268
269 pub in_lookbehind: bool,
271
272 pub unicode: bool,
274}
275
276impl Walk {
277 fn new(unicode: bool) -> Self {
278 Self {
279 skip_children: false,
280 depth: 0,
281 in_lookbehind: false,
282 unicode,
283 }
284 }
285}
286
287#[derive(Debug)]
288struct Walker<'a, F>
289where
290 F: FnMut(&Node, &mut Walk),
291{
292 func: &'a mut F,
293 postorder: bool,
294 walk: Walk,
295}
296
297impl<F> Walker<'_, F>
298where
299 F: FnMut(&Node, &mut Walk),
300{
301 fn process_children(&mut self, n: &Node) {
302 match n {
303 Node::Empty
304 | Node::Goal
305 | Node::Char { .. }
306 | Node::ByteSequence(..)
307 | Node::ByteSet(..)
308 | Node::CharSet(..)
309 | Node::WordBoundary { .. }
310 | Node::BackRef { .. }
311 | Node::Bracket { .. }
312 | Node::MatchAny
313 | Node::MatchAnyExceptLineTerminator
314 | Node::Anchor { .. } => {}
315 Node::Cat(nodes) => {
316 for node in nodes {
317 self.process(node);
318 }
319 }
320 Node::Alt(left, right) => {
321 self.process(left.as_ref());
322 self.process(right.as_ref());
323 }
324
325 Node::Loop { loopee, .. } | Node::Loop1CharBody { loopee, .. } => self.process(loopee),
326 Node::CaptureGroup { contents, .. } => self.process(contents.as_ref()),
327
328 Node::LookaroundAssertion {
329 backwards,
330 contents,
331 ..
332 } => {
333 let saved = self.walk.in_lookbehind;
334 self.walk.in_lookbehind = *backwards;
335 self.process(contents.as_ref());
336 self.walk.in_lookbehind = saved;
337 }
338 }
339 }
340
341 fn process(&mut self, n: &Node) {
342 self.walk.skip_children = false;
343 if !self.postorder {
344 (self.func)(n, &mut self.walk);
345 }
346 if !self.walk.skip_children {
347 self.walk.depth += 1;
348 self.process_children(n);
349 self.walk.depth -= 1;
350 }
351 if self.postorder {
352 (self.func)(n, &mut self.walk)
353 }
354 }
355}
356
357#[derive(Debug)]
358struct MutWalker<'a, F>
359where
360 F: FnMut(&mut Node, &mut Walk),
361{
362 func: &'a mut F,
363 postorder: bool,
364 walk: Walk,
365}
366
367impl<F> MutWalker<'_, F>
368where
369 F: FnMut(&mut Node, &mut Walk),
370{
371 fn process_children(&mut self, n: &mut Node) {
372 match n {
373 Node::Empty
374 | Node::Goal
375 | Node::Char { .. }
376 | Node::ByteSequence(..)
377 | Node::ByteSet(..)
378 | Node::CharSet(..)
379 | Node::MatchAny
380 | Node::MatchAnyExceptLineTerminator
381 | Node::Anchor { .. }
382 | Node::WordBoundary { .. }
383 | Node::BackRef { .. }
384 | Node::Bracket { .. } => {}
385 Node::Cat(nodes) => {
386 nodes.iter_mut().for_each(|node| self.process(node));
387 }
388 Node::Alt(left, right) => {
389 self.process(left.as_mut());
390 self.process(right.as_mut());
391 }
392
393 Node::Loop { loopee, .. } | Node::Loop1CharBody { loopee, .. } => {
394 self.process(loopee);
395 }
396 Node::CaptureGroup { contents, .. } => self.process(contents.as_mut()),
397
398 Node::LookaroundAssertion {
399 backwards,
400 contents,
401 ..
402 } => {
403 let saved = self.walk.in_lookbehind;
404 self.walk.in_lookbehind = *backwards;
405 self.process(contents.as_mut());
406 self.walk.in_lookbehind = saved;
407 }
408 }
409 }
410
411 fn process(&mut self, n: &mut Node) {
412 self.walk.skip_children = false;
413 if !self.postorder {
414 (self.func)(n, &mut self.walk);
415 }
416 if !self.walk.skip_children {
417 self.walk.depth += 1;
418 self.process_children(n);
419 self.walk.depth -= 1;
420 }
421 if self.postorder {
422 (self.func)(n, &mut self.walk);
423 }
424 }
425}
426
427pub fn walk<F>(postorder: bool, unicode: bool, n: &Node, func: &mut F)
431where
432 F: FnMut(&Node, &mut Walk),
433{
434 let mut walker = Walker {
435 func,
436 postorder,
437 walk: Walk::new(unicode),
438 };
439 walker.process(n);
440}
441
442pub fn walk_mut<F>(postorder: bool, unicode: bool, n: &mut Node, func: &mut F)
449where
450 F: FnMut(&mut Node, &mut Walk),
451{
452 let mut walker = MutWalker {
453 func,
454 postorder,
455 walk: Walk::new(unicode),
456 };
457 walker.process(n);
458}
459
460pub struct Regex {
462 pub node: Node,
463 pub flags: api::Flags,
464}
465
466impl Regex {}
467
468fn display_node(node: &Node, depth: usize, f: &mut fmt::Formatter) -> fmt::Result {
469 for _ in 0..depth {
470 write!(f, "..")?;
471 }
472 match node {
473 Node::Empty => {
474 writeln!(f, "Empty")?;
475 }
476 Node::Goal => {
477 writeln!(f, "Goal")?;
478 }
479 Node::Char { c, icase: _ } => {
480 writeln!(f, "'{}'", &c.to_string())?;
481 }
482 Node::ByteSequence(bytes) => {
483 write!(f, "ByteSeq{} 0x", bytes.len())?;
484 for &b in bytes {
485 write!(f, "{:x}", b)?;
486 }
487 writeln!(f)?;
488 }
489 Node::ByteSet(bytes) => {
490 let len = bytes.len();
491 write!(f, "ByteSet{}", len)?;
492 for &b in bytes {
493 write!(f, " 0x{:x}", b)?;
494 }
495 writeln!(f)?;
496 }
497 Node::CharSet(chars) => {
498 write!(f, "CharSet ")?;
499 let mut first = true;
500 for &c in chars {
501 if !first {
502 write!(f, ", ")?;
503 }
504 first = false;
505 write!(f, "0x{:x}", { c })?;
506 }
507 writeln!(f)?;
508 }
509 Node::Cat(..) => {
510 writeln!(f, "Cat")?;
511 }
512 Node::Alt(..) => {
513 writeln!(f, "Alt")?;
514 }
515 Node::MatchAny => {
516 writeln!(f, "MatchAny")?;
517 }
518 Node::MatchAnyExceptLineTerminator => {
519 writeln!(f, "MatchAnyExceptLineTerminator")?;
520 }
521 Node::Anchor {
522 anchor_type,
523 multiline,
524 } => {
525 writeln!(f, "Anchor {:?} multiline={}", anchor_type, multiline)?;
526 }
527 Node::Loop {
528 quant,
529 enclosed_groups,
530 ..
531 } => {
532 writeln!(f, "Loop (groups {:?}) {:?}", enclosed_groups, quant)?;
533 }
534 Node::Loop1CharBody { quant, .. } => {
535 writeln!(f, "Loop1Char {:?}", quant)?;
536 }
537 Node::CaptureGroup { id, name, .. } => {
538 if let Some(name) = name {
539 writeln!(f, "CaptureGroup {:?} {:?}", id, name)?;
540 } else {
541 writeln!(f, "CaptureGroup {:?}", id)?;
542 }
543 }
544 &Node::WordBoundary { invert } => {
545 let kind = if invert { "\\B" } else { "\\b" };
546 writeln!(f, "WordBoundary {:?} ", kind)?;
547 }
548 &Node::BackRef(group) => {
549 writeln!(f, "BackRef {:?} ", group)?;
550 }
551 Node::Bracket(contents) => {
552 writeln!(f, "Bracket {:?}", contents)?;
553 }
554
555 &Node::LookaroundAssertion {
556 negate,
557 backwards,
558 start_group,
559 end_group,
560 ..
561 } => {
562 let sense = if negate { "negative" } else { "positive" };
563 let direction = if backwards { "backwards" } else { "forwards" };
564 writeln!(
565 f,
566 "LookaroundAssertion {} {} {:?} {:?}",
567 sense, direction, start_group, end_group
568 )?;
569 }
570 }
571 Ok(())
572}
573
574impl fmt::Display for Regex {
575 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
576 let mut result = Ok(());
578 walk(
579 false,
580 self.flags.unicode,
581 &self.node,
582 &mut |node: &Node, walk: &mut Walk| {
583 if result.is_ok() {
584 result = display_node(node, walk.depth, f)
585 }
586 },
587 );
588 result
589 }
590}