1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
use boreal_parser::regex::Node;

/// Trait used to visit a regex ast in constant stack space.
pub trait Visitor {
    /// Type of the result of the visit.
    type Output;
    /// Type of errors generated during the visit.
    type Err;

    /// Called for all nodes, before visiting children nodes.
    ///
    /// If an error is returned, the visit is stopped and the error bubbled up.
    #[allow(clippy::missing_errors_doc)]
    fn visit_pre(&mut self, node: &Node) -> Result<VisitAction, Self::Err>;

    /// Called for all nodes, after visiting children nodes.
    ///
    /// If an error is returned, the visit is stopped and the error bubbled up.
    #[allow(clippy::missing_errors_doc)]
    fn visit_post(&mut self, _node: &Node) -> Result<(), Self::Err> {
        Ok(())
    }

    /// Called between alternation nodes.
    fn visit_alternation_in(&mut self) {}

    #[allow(clippy::missing_errors_doc)]
    /// Close the visitor and return the result.
    fn finish(self) -> Result<Self::Output, Self::Err>;
}

/// Action to take on a given node regarding its children.
///
/// This only make sense on compound nodes.
pub enum VisitAction {
    /// Continue walking inside the node, visiting its children.
    Continue,
    /// Skip the visit of the children nodes.
    Skip,
}

/// Visit a regex AST.
///
/// This is done with a heap-based stack to ensure that the stack does not grow while visiting
/// the regex, preventing stack overflows on expressions with too much depth.
///
/// # Errors
///
/// If the visitor generates an error any time during the visit, the visit ends and the error
/// is returned.
// This is greatly inspired by the HeapVisitor of the regex crate.
// See
// <https://github.com/rust-lang/regex/blob/regex-syntax-0.6.25/regex-syntax/src/hir/visitor.rs>
pub fn visit<V: Visitor>(mut node: &Node, mut visitor: V) -> Result<V::Output, V::Err> {
    // Heap-base stack to visit nodes without growing the stack.
    // Each element is:
    // - a node that is currently being visited.
    // - a list of its children nodes left to visit.
    let mut stack = Vec::new();

    loop {
        if let VisitAction::Continue = visitor.visit_pre(node)? {
            if let Some(frame) = build_stack_frame(node) {
                // New stack frame for the node. Push the node and its frame onto the stack,
                // and visit its first children.
                let child = frame.node;
                stack.push((frame, node));
                node = child;
                continue;
            }
        }

        // Node has either no children or `VisitAction::Skip` was returned. End the visit on
        // this node and go through the stack until finding a new node to visit.
        visitor.visit_post(node)?;
        loop {
            match stack.pop() {
                Some((frame, parent)) => {
                    match frame.next() {
                        // More children in the current frame
                        Some(new_frame) => {
                            // If frame is an alternation, and we have a new children,
                            // we are between two alternated nodes.
                            if new_frame.is_alternation {
                                visitor.visit_alternation_in();
                            }

                            // Push the new frame onto the stack
                            let child = new_frame.node;
                            stack.push((new_frame, parent));
                            node = child;
                            break;
                        }
                        // Frame is exhausted, visit_post the parent and pop the next element
                        None => visitor.visit_post(parent)?,
                    }
                }
                None => {
                    return visitor.finish();
                }
            }
        }
    }
}

struct StackFrame<'a> {
    node: &'a Node,

    rest: &'a [Node],

    is_alternation: bool,
}

impl<'a> StackFrame<'a> {
    /// Get the next node in the frame.
    ///
    /// This returns:
    /// - None if there are no other nodes in the frame.
    /// - a new stack frame and the next node otherwise.
    fn next(self) -> Option<StackFrame<'a>> {
        if self.rest.is_empty() {
            None
        } else {
            Some(StackFrame {
                node: &self.rest[0],
                rest: &self.rest[1..],
                is_alternation: self.is_alternation,
            })
        }
    }
}

/// Build a stack frame for the given node.
fn build_stack_frame(node: &Node) -> Option<StackFrame> {
    match node {
        Node::Group(node) | Node::Repetition { node, .. } => Some(StackFrame {
            node,
            rest: &[],
            is_alternation: false,
        }),
        Node::Concat(nodes) | Node::Alternation(nodes) if nodes.is_empty() => None,
        Node::Concat(nodes) => Some(StackFrame {
            node: &nodes[0],
            rest: &nodes[1..],
            is_alternation: false,
        }),
        Node::Alternation(nodes) => Some(StackFrame {
            node: &nodes[0],
            rest: &nodes[1..],
            is_alternation: true,
        }),
        _ => None,
    }
}