use crate::api;
use crate::types::{BracketContents, CaptureGroupID};
use std::fmt;
#[derive(Debug, Copy, Clone)]
pub enum AnchorType {
StartOfLine, EndOfLine, }
#[derive(Debug, Copy, Clone)]
pub struct Quantifier {
pub min: usize,
pub max: usize,
pub greedy: bool,
}
#[derive(Debug)]
pub enum Node {
Empty,
Goal,
Char { c: char, icase: bool },
ByteSequence(Vec<u8>),
ByteSet(Vec<u8>),
CharSet(Vec<char>),
Cat(Vec<Node>),
Alt(Box<Node>, Box<Node>),
MatchAny,
MatchAnyExceptLineTerminator,
Anchor(AnchorType),
WordBoundary { invert: bool },
CaptureGroup(Box<Node>, CaptureGroupID),
BackRef(u32),
Bracket(BracketContents),
LookaroundAssertion {
negate: bool,
backwards: bool,
start_group: CaptureGroupID,
end_group: CaptureGroupID,
contents: Box<Node>,
},
Loop {
loopee: Box<Node>,
quant: Quantifier,
enclosed_groups: std::ops::Range<u16>,
},
Loop1CharBody {
loopee: Box<Node>,
quant: Quantifier,
},
}
pub type NodeList = Vec<Node>;
impl Node {
pub fn reverse_cats(&mut self, w: &mut Walk) {
match self {
Node::Cat(nodes) if w.in_lookbehind => nodes.reverse(),
Node::ByteSequence(..) => panic!("Should not be reversing literal bytes"),
_ => {}
}
}
pub fn is_empty(&self) -> bool {
match self {
Node::Empty => true,
_ => false,
}
}
pub fn is_cat(&self) -> bool {
match self {
Node::Cat(..) => true,
_ => false,
}
}
pub fn matches_exactly_one_char(&self) -> bool {
match self {
Node::Char { .. } => true,
Node::CharSet { .. } => true,
Node::Bracket { .. } => true,
Node::MatchAny => true,
Node::MatchAnyExceptLineTerminator => true,
_ => false,
}
}
pub fn duplicate(&self) -> Node {
match self {
Node::Empty => Node::Empty,
Node::Goal => Node::Goal,
&Node::Char { c, icase } => Node::Char { c, icase },
Node::ByteSequence(bytes) => Node::ByteSequence(bytes.clone()),
Node::ByteSet(bytes) => Node::ByteSet(bytes.clone()),
Node::CharSet(chars) => Node::CharSet(chars.clone()),
Node::Cat(nodes) => Node::Cat(nodes.iter().map(|n| n.duplicate()).collect()),
Node::Alt(left, right) => {
Node::Alt(Box::new(left.duplicate()), Box::new(right.duplicate()))
}
Node::MatchAny => Node::MatchAny,
Node::MatchAnyExceptLineTerminator => Node::MatchAnyExceptLineTerminator,
&Node::Anchor(anchor_type) => Node::Anchor(anchor_type),
Node::Loop {
loopee,
quant,
enclosed_groups,
} => {
assert!(
enclosed_groups.start >= enclosed_groups.end,
"Cannot duplicate a loop with enclosed groups"
);
Node::Loop {
loopee: Box::new(loopee.as_ref().duplicate()),
quant: *quant,
enclosed_groups: enclosed_groups.clone(),
}
}
Node::Loop1CharBody { loopee, quant } => Node::Loop1CharBody {
loopee: Box::new(loopee.as_ref().duplicate()),
quant: *quant,
},
Node::CaptureGroup(..) => {
panic!("Refusing to duplicate a capture group");
}
&Node::WordBoundary { invert } => Node::WordBoundary { invert },
&Node::BackRef(idx) => Node::BackRef(idx),
Node::Bracket(bc) => Node::Bracket(bc.clone()),
Node::LookaroundAssertion {
negate,
backwards,
start_group,
end_group,
contents,
} => {
assert!(
start_group >= end_group,
"Cannot duplicate an assertion with enclosed groups"
);
Node::LookaroundAssertion {
negate: *negate,
backwards: *backwards,
start_group: *start_group,
end_group: *end_group,
contents: Box::new((*contents).duplicate()),
}
}
}
}
}
#[derive(Debug, Clone, Default)]
pub struct Walk {
pub skip_children: bool,
pub depth: usize,
pub in_lookbehind: bool,
}
#[derive(Debug)]
struct Walker<'a, F>
where
F: FnMut(&Node, &mut Walk),
{
func: &'a mut F,
postorder: bool,
walk: Walk,
}
impl<'a, F> Walker<'a, F>
where
F: FnMut(&Node, &mut Walk),
{
fn process_children(&mut self, n: &Node) {
match n {
Node::Empty => {}
Node::Goal => {}
Node::Char { .. } => {}
Node::ByteSequence(..) => {}
Node::ByteSet(..) => {}
Node::CharSet(..) => {}
Node::Cat(nodes) => {
for node in nodes {
self.process(node);
}
}
Node::Alt(left, right) => {
self.process(left.as_ref());
self.process(right.as_ref());
}
Node::MatchAny => {}
Node::MatchAnyExceptLineTerminator => {}
Node::Anchor { .. } => {}
Node::Loop { loopee, .. } => self.process(loopee),
Node::Loop1CharBody { loopee, .. } => self.process(loopee),
Node::CaptureGroup(contents, ..) => self.process(contents.as_ref()),
Node::WordBoundary { .. } => {}
Node::BackRef { .. } => {}
Node::Bracket { .. } => {}
Node::LookaroundAssertion {
backwards,
contents,
..
} => {
let saved = self.walk.in_lookbehind;
self.walk.in_lookbehind = *backwards;
self.process(contents.as_ref());
self.walk.in_lookbehind = saved;
}
}
}
fn process(&mut self, n: &Node) {
self.walk.skip_children = false;
if !self.postorder {
(self.func)(n, &mut self.walk);
}
if !self.walk.skip_children {
self.walk.depth += 1;
self.process_children(n);
self.walk.depth -= 1;
}
if self.postorder {
(self.func)(n, &mut self.walk)
}
}
}
#[derive(Debug)]
struct MutWalker<'a, F>
where
F: FnMut(&mut Node, &mut Walk),
{
func: &'a mut F,
postorder: bool,
walk: Walk,
}
impl<'a, F> MutWalker<'a, F>
where
F: FnMut(&mut Node, &mut Walk),
{
fn process_children(&mut self, n: &mut Node) {
match n {
Node::Empty => {}
Node::Goal => {}
Node::Char { .. } => {}
Node::ByteSequence(..) => {}
Node::ByteSet(..) => {}
Node::CharSet(..) => {}
Node::Cat(nodes) => {
for node in nodes {
self.process(node);
}
}
Node::Alt(left, right) => {
self.process(left.as_mut());
self.process(right.as_mut());
}
Node::MatchAny => {}
Node::MatchAnyExceptLineTerminator => {}
Node::Anchor { .. } => {}
Node::Loop { loopee, .. } => {
self.process(loopee);
}
Node::Loop1CharBody { loopee, .. } => {
self.process(loopee);
}
Node::CaptureGroup(contents, ..) => self.process(contents.as_mut()),
Node::WordBoundary { .. } => {}
Node::BackRef { .. } => {}
Node::Bracket { .. } => {}
Node::LookaroundAssertion {
backwards,
contents,
..
} => {
let saved = self.walk.in_lookbehind;
self.walk.in_lookbehind = *backwards;
self.process(contents.as_mut());
self.walk.in_lookbehind = saved;
}
}
}
fn process(&mut self, n: &mut Node) {
self.walk.skip_children = false;
if !self.postorder {
(self.func)(n, &mut self.walk);
}
if !self.walk.skip_children {
self.walk.depth += 1;
self.process_children(n);
self.walk.depth -= 1;
}
if self.postorder {
(self.func)(n, &mut self.walk);
}
}
}
pub fn walk<F>(postorder: bool, n: &Node, func: &mut F)
where
F: FnMut(&Node, &mut Walk),
{
let mut walker = Walker {
func,
postorder,
walk: Walk::default(),
};
walker.process(n);
}
pub fn walk_mut<F>(postorder: bool, n: &mut Node, func: &mut F)
where
F: FnMut(&mut Node, &mut Walk),
{
let mut walker = MutWalker {
func,
postorder,
walk: Walk::default(),
};
walker.process(n);
}
pub struct Regex {
pub node: Node,
pub flags: api::Flags,
}
impl Regex {}
fn display_node(node: &Node, depth: usize, f: &mut fmt::Formatter) -> fmt::Result {
for _ in 0..depth {
write!(f, "..")?;
}
match node {
Node::Empty => {
writeln!(f, "Empty")?;
}
Node::Goal => {
writeln!(f, "Goal")?;
}
Node::Char { c, icase: _ } => {
writeln!(f, "'{}'", &c.to_string())?;
}
Node::ByteSequence(bytes) => {
write!(f, "ByteSeq{} 0x", bytes.len())?;
for &b in bytes {
write!(f, "{:x}", b)?;
}
writeln!(f)?;
}
Node::ByteSet(bytes) => {
let len = bytes.len();
write!(f, "ByteSet{}", len)?;
if len > 0 {
write!(f, "0x")?;
for &b in bytes {
write!(f, "{:x}", b)?;
}
}
writeln!(f)?;
}
Node::CharSet(chars) => {
write!(f, "CharSet 0x")?;
for &c in chars {
write!(f, "{:x}", c as u32)?;
}
writeln!(f)?;
}
Node::Cat(..) => {
writeln!(f, "Cat")?;
}
Node::Alt(..) => {
writeln!(f, "Alt")?;
}
Node::MatchAny => {
writeln!(f, "MatchAny")?;
}
Node::MatchAnyExceptLineTerminator => {
writeln!(f, "MatchAnyExceptLineTerminator")?;
}
Node::Anchor(anchor_type) => {
writeln!(f, "Anchor {:?}", anchor_type)?;
}
Node::Loop {
quant,
enclosed_groups,
..
} => {
writeln!(f, "Loop (groups {:?}) {:?}", enclosed_groups, quant)?;
}
Node::Loop1CharBody { quant, .. } => {
writeln!(f, "Loop1Char {:?}", quant)?;
}
Node::CaptureGroup(_node, idx, ..) => {
writeln!(f, "CaptureGroup {:?}", idx)?;
}
&Node::WordBoundary { invert } => {
let kind = if invert { "\\B" } else { "\\b" };
writeln!(f, "WordBoundary {:?} ", kind)?;
}
&Node::BackRef(group) => {
writeln!(f, "BackRef {:?} ", group)?;
}
Node::Bracket(contents) => {
writeln!(f, "Bracket {:?}", contents)?;
}
&Node::LookaroundAssertion {
negate,
backwards,
start_group,
end_group,
..
} => {
let sense = if negate { "negative" } else { "positive" };
let direction = if backwards { "backwards" } else { "forwards" };
writeln!(
f,
"LookaroundAssertion {} {} {:?} {:?}",
sense, direction, start_group, end_group
)?;
}
}
Ok(())
}
impl fmt::Display for Regex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut result = Ok(());
walk(false, &self.node, &mut |node: &Node, walk: &mut Walk| {
if result.is_ok() {
result = display_node(node, walk.depth, f)
}
});
result
}
}