1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
use boreal_parser::regex::Node;
/// Trait used to visit a regex ast in constant stack space.
pub trait Visitor {
/// Type of the result of the visit.
type Output;
/// Type of errors generated during the visit.
type Err;
/// Called for all nodes, before visiting children nodes.
///
/// If an error is returned, the visit is stopped and the error bubbled up.
#[allow(clippy::missing_errors_doc)]
fn visit_pre(&mut self, node: &Node) -> Result<VisitAction, Self::Err>;
/// Called for all nodes, after visiting children nodes.
///
/// If an error is returned, the visit is stopped and the error bubbled up.
#[allow(clippy::missing_errors_doc)]
fn visit_post(&mut self, _node: &Node) -> Result<(), Self::Err> {
Ok(())
}
/// Called between alternation nodes.
fn visit_alternation_in(&mut self) {}
#[allow(clippy::missing_errors_doc)]
/// Close the visitor and return the result.
fn finish(self) -> Result<Self::Output, Self::Err>;
}
/// Action to take on a given node regarding its children.
///
/// This only make sense on compound nodes.
pub enum VisitAction {
/// Continue walking inside the node, visiting its children.
Continue,
/// Skip the visit of the children nodes.
Skip,
}
/// Visit a regex AST.
///
/// This is done with a heap-based stack to ensure that the stack does not grow while visiting
/// the regex, preventing stack overflows on expressions with too much depth.
///
/// # Errors
///
/// If the visitor generates an error any time during the visit, the visit ends and the error
/// is returned.
// This is greatly inspired by the HeapVisitor of the regex crate.
// See
// <https://github.com/rust-lang/regex/blob/regex-syntax-0.6.25/regex-syntax/src/hir/visitor.rs>
pub fn visit<V: Visitor>(mut node: &Node, mut visitor: V) -> Result<V::Output, V::Err> {
// Heap-base stack to visit nodes without growing the stack.
// Each element is:
// - a node that is currently being visited.
// - a list of its children nodes left to visit.
let mut stack = Vec::new();
loop {
if let VisitAction::Continue = visitor.visit_pre(node)? {
if let Some(frame) = build_stack_frame(node) {
// New stack frame for the node. Push the node and its frame onto the stack,
// and visit its first children.
let child = frame.node;
stack.push((frame, node));
node = child;
continue;
}
}
// Node has either no children or `VisitAction::Skip` was returned. End the visit on
// this node and go through the stack until finding a new node to visit.
visitor.visit_post(node)?;
loop {
match stack.pop() {
Some((frame, parent)) => {
match frame.next() {
// More children in the current frame
Some(new_frame) => {
// If frame is an alternation, and we have a new children,
// we are between two alternated nodes.
if new_frame.is_alternation {
visitor.visit_alternation_in();
}
// Push the new frame onto the stack
let child = new_frame.node;
stack.push((new_frame, parent));
node = child;
break;
}
// Frame is exhausted, visit_post the parent and pop the next element
None => visitor.visit_post(parent)?,
}
}
None => {
return visitor.finish();
}
}
}
}
}
struct StackFrame<'a> {
node: &'a Node,
rest: &'a [Node],
is_alternation: bool,
}
impl<'a> StackFrame<'a> {
/// Get the next node in the frame.
///
/// This returns:
/// - None if there are no other nodes in the frame.
/// - a new stack frame and the next node otherwise.
fn next(self) -> Option<StackFrame<'a>> {
if self.rest.is_empty() {
None
} else {
Some(StackFrame {
node: &self.rest[0],
rest: &self.rest[1..],
is_alternation: self.is_alternation,
})
}
}
}
/// Build a stack frame for the given node.
fn build_stack_frame(node: &Node) -> Option<StackFrame> {
match node {
Node::Group(node) | Node::Repetition { node, .. } => Some(StackFrame {
node,
rest: &[],
is_alternation: false,
}),
Node::Concat(nodes) | Node::Alternation(nodes) if nodes.is_empty() => None,
Node::Concat(nodes) => Some(StackFrame {
node: &nodes[0],
rest: &nodes[1..],
is_alternation: false,
}),
Node::Alternation(nodes) => Some(StackFrame {
node: &nodes[0],
rest: &nodes[1..],
is_alternation: true,
}),
_ => None,
}
}