ere_core/
simplified_tree.rs

1//! Implements a simplified intermediate representation of a regular expression.
2
3use crate::{config::Config, parse_tree::*};
4
5/// For translation between our parsed [`ERE`] and the [`crate::working_nfa::WorkingNFA`]
6#[derive(Debug, PartialEq, Eq, Clone)]
7pub enum SimplifiedTreeNode {
8    /// Translates to `epsilon`
9    Empty,
10    Symbol(Atom),
11    Union(Vec<SimplifiedTreeNode>),
12    /// `Capture(child, group_num)`
13    Capture(Box<SimplifiedTreeNode>, usize),
14    Concat(Vec<SimplifiedTreeNode>),
15    /// `Repeat(child, times)`
16    Repeat(Box<SimplifiedTreeNode>, usize),
17    /// `UpTo(child, max_times, longest)`
18    UpTo(Box<SimplifiedTreeNode>, usize, bool),
19    /// `Star(child, longest)`
20    Star(Box<SimplifiedTreeNode>, bool),
21    Start,
22    End,
23    Never,
24}
25impl SimplifiedTreeNode {
26    pub fn optional(self, longest: bool) -> SimplifiedTreeNode {
27        return self.upto(1, longest);
28    }
29    pub fn union(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
30        if let SimplifiedTreeNode::Union(mut u) = self {
31            u.push(other);
32            return SimplifiedTreeNode::Union(u);
33        }
34        return SimplifiedTreeNode::Union(vec![self, other]);
35    }
36    pub fn concat(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
37        if let SimplifiedTreeNode::Concat(mut c) = self {
38            c.push(other);
39            return SimplifiedTreeNode::Concat(c);
40        }
41        return SimplifiedTreeNode::Concat(vec![self, other]);
42    }
43    pub fn repeat(self, count: usize) -> SimplifiedTreeNode {
44        return SimplifiedTreeNode::Repeat(self.into(), count);
45    }
46    pub fn upto(self, count: usize, longest: bool) -> SimplifiedTreeNode {
47        return SimplifiedTreeNode::UpTo(self.into(), count, longest);
48    }
49    pub fn star(self, longest: bool) -> SimplifiedTreeNode {
50        return SimplifiedTreeNode::Star(self.into(), longest);
51    }
52
53    // /// A non-optimized, backtracking implementation
54    // /// - `start` is the index of the original `text` it starts at
55    // ///
56    // /// # Returns
57    // /// The number of matched symbols, or `None` if no match is found.
58    // fn _check(&self, text: &str, start: usize, variation: usize) -> Option<usize> {
59    //     return match self {
60    //         SimplifiedTreeNode::Empty => Some(0),
61    //         SimplifiedTreeNode::Symbol(atom) => atom.check(text.chars().next()?).then_some(1),
62    //         SimplifiedTreeNode::Union(vec) => todo!(),
63    //         SimplifiedTreeNode::Capture(node, _) => node._check(text, start),
64    //         SimplifiedTreeNode::Concat(vec) => {
65    //             let mut size = 0;
66    //             for node in vec {
67    //                 size += node._check(&text[size..], start + size)?;
68    //             }
69    //             Some(size)
70    //         }
71    //         SimplifiedTreeNode::Repeat(node, n) => {
72    //             let mut size = 0;
73    //             for _ in 0..(*n) {
74    //                 size += node._check(&text[size..], start + size)?;
75    //             }
76    //             Some(size)
77    //         }
78    //         SimplifiedTreeNode::UpTo(node, n) => todo!(),
79    //         SimplifiedTreeNode::Star(node) => todo!(),
80    //         SimplifiedTreeNode::Start if start == 0 => Some(0),
81    //         SimplifiedTreeNode::Start => None,
82    //         SimplifiedTreeNode::End if start == text.len() => Some(0),
83    //         SimplifiedTreeNode::End => None,
84    //         SimplifiedTreeNode::Never => None,
85    //     };
86    // }
87    // /// A non-optimized, backtracking implementation
88    // pub fn check(&self, text: &str) -> bool {
89    //     for i in 0..text.len() {
90    //         if let Some(_) = self._check(&text[..i], i) {
91    //             return true;
92    //         }
93    //     }
94    //     return self._check("", text.len()).is_some();
95    // }
96}
97impl SimplifiedTreeNode {
98    fn from_sub_ere(
99        value: &ERE,
100        mut group_num: usize,
101        config: &Config,
102    ) -> (SimplifiedTreeNode, usize) {
103        let parts = value
104            .0
105            .iter()
106            .map(|part| {
107                let (new_node, new_group_num) =
108                    SimplifiedTreeNode::from_ere_branch(&part, group_num, config);
109                group_num = new_group_num;
110                new_node
111            })
112            .collect();
113        return (SimplifiedTreeNode::Union(parts), group_num);
114    }
115    fn from_ere_branch(
116        value: &EREBranch,
117        mut group_num: usize,
118        config: &Config,
119    ) -> (SimplifiedTreeNode, usize) {
120        let parts = value
121            .0
122            .iter()
123            .map(|part| {
124                let (new_node, new_group_num) =
125                    SimplifiedTreeNode::from_ere_part(&part, group_num, config);
126                group_num = new_group_num;
127                new_node
128            })
129            .collect();
130        return (SimplifiedTreeNode::Concat(parts), group_num);
131    }
132    fn from_ere_part(
133        value: &EREPart,
134        group_num: usize,
135        config: &Config,
136    ) -> (SimplifiedTreeNode, usize) {
137        return match value {
138            EREPart::Single(expr) => {
139                SimplifiedTreeNode::from_ere_expression(expr, group_num, config)
140            }
141            EREPart::Quantified(expr, quantifier) => {
142                let (child, group_num) =
143                    SimplifiedTreeNode::from_ere_expression(expr, group_num, config);
144                let longest = config.quantifiers_prefer_longest() ^ quantifier.alt;
145                let part = match &quantifier.quantifier {
146                    QuantifierType::Star => child.star(quantifier.alt),
147                    QuantifierType::Plus => child.clone().concat(child.star(longest)),
148                    QuantifierType::QuestionMark => child.optional(longest),
149                    QuantifierType::Multiple(n) => child.repeat(*n as usize),
150                    QuantifierType::Range(n, None) => child
151                        .clone()
152                        .repeat(*n as usize)
153                        .concat(child.star(longest)),
154                    QuantifierType::Range(n, Some(m)) => match m.checked_sub(*n) {
155                        None => SimplifiedTreeNode::Never,
156                        Some(0) => child.repeat(*n as usize),
157                        Some(r) => child
158                            .clone()
159                            .repeat(*n as usize)
160                            .concat(child.upto(r as usize, longest)),
161                    },
162                };
163                (part, group_num)
164            }
165            EREPart::Start => (SimplifiedTreeNode::Start, group_num),
166            EREPart::End => (SimplifiedTreeNode::End, group_num),
167        };
168    }
169    fn from_ere_expression(
170        value: &EREExpression,
171        group_num: usize,
172        config: &Config,
173    ) -> (SimplifiedTreeNode, usize) {
174        return match value {
175            EREExpression::Atom(atom) => (atom.clone().into(), group_num),
176            EREExpression::Subexpression(ere) => {
177                let (capture, next_group_num) =
178                    SimplifiedTreeNode::from_sub_ere(ere, group_num + 1, config);
179                (
180                    SimplifiedTreeNode::Capture(capture.into(), group_num),
181                    next_group_num,
182                )
183            }
184        };
185    }
186    /// Returns the simplified tree, along with the number of capture groups (full expression is group 0)
187    pub fn from_ere(value: &ERE, config: &Config) -> (SimplifiedTreeNode, usize) {
188        let (root, groups) = SimplifiedTreeNode::from_sub_ere(value, 1, config);
189        return (SimplifiedTreeNode::Capture(Box::new(root), 0), groups);
190    }
191}
192impl From<ERE> for SimplifiedTreeNode {
193    fn from(value: ERE) -> Self {
194        return SimplifiedTreeNode::from_ere(&value, &Config::default()).0;
195    }
196}
197impl From<Atom> for SimplifiedTreeNode {
198    fn from(value: Atom) -> Self {
199        return SimplifiedTreeNode::Symbol(value);
200    }
201}