ere_core/
simplified_tree.rs

1use crate::parse_tree::*;
2
3/// For translation between our parse tree and https://en.wikipedia.org/wiki/Thompson%27s_construction
4#[derive(Debug, PartialEq, Eq, Clone)]
5pub enum SimplifiedTreeNode {
6    /// Translates to `epsilon`
7    Empty,
8    Symbol(Atom),
9    Union(Vec<SimplifiedTreeNode>),
10    /// `Capture(child, group_num)`
11    Capture(Box<SimplifiedTreeNode>, usize),
12    Concat(Vec<SimplifiedTreeNode>),
13    /// `Repeat(child, times)`
14    Repeat(Box<SimplifiedTreeNode>, usize),
15    /// `UpTo(child, max_times)`
16    UpTo(Box<SimplifiedTreeNode>, usize),
17    Star(Box<SimplifiedTreeNode>),
18    Start,
19    End,
20    Never,
21}
22impl SimplifiedTreeNode {
23    pub fn optional(self) -> SimplifiedTreeNode {
24        return self.union(SimplifiedTreeNode::Empty);
25    }
26    pub fn union(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
27        if let SimplifiedTreeNode::Union(mut u) = self {
28            u.push(other);
29            return SimplifiedTreeNode::Union(u);
30        }
31        return SimplifiedTreeNode::Union(vec![self, other]);
32    }
33    pub fn concat(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
34        if let SimplifiedTreeNode::Concat(mut c) = self {
35            c.push(other);
36            return SimplifiedTreeNode::Concat(c);
37        }
38        return SimplifiedTreeNode::Concat(vec![self, other]);
39    }
40    pub fn repeat(self, count: usize) -> SimplifiedTreeNode {
41        return SimplifiedTreeNode::Repeat(self.into(), count);
42    }
43    pub fn upto(self, count: usize) -> SimplifiedTreeNode {
44        return SimplifiedTreeNode::UpTo(self.into(), count);
45    }
46    pub fn star(self) -> SimplifiedTreeNode {
47        return SimplifiedTreeNode::Star(self.into());
48    }
49
50    // /// A non-optimized, backtracking implementation
51    // /// - `start` is the index of the original `text` it starts at
52    // ///
53    // /// # Returns
54    // /// The number of matched symbols, or `None` if no match is found.
55    // fn _check(&self, text: &str, start: usize, variation: usize) -> Option<usize> {
56    //     return match self {
57    //         SimplifiedTreeNode::Empty => Some(0),
58    //         SimplifiedTreeNode::Symbol(atom) => atom.check(text.chars().next()?).then_some(1),
59    //         SimplifiedTreeNode::Union(vec) => todo!(),
60    //         SimplifiedTreeNode::Capture(node, _) => node._check(text, start),
61    //         SimplifiedTreeNode::Concat(vec) => {
62    //             let mut size = 0;
63    //             for node in vec {
64    //                 size += node._check(&text[size..], start + size)?;
65    //             }
66    //             Some(size)
67    //         }
68    //         SimplifiedTreeNode::Repeat(node, n) => {
69    //             let mut size = 0;
70    //             for _ in 0..(*n) {
71    //                 size += node._check(&text[size..], start + size)?;
72    //             }
73    //             Some(size)
74    //         }
75    //         SimplifiedTreeNode::UpTo(node, n) => todo!(),
76    //         SimplifiedTreeNode::Star(node) => todo!(),
77    //         SimplifiedTreeNode::Start if start == 0 => Some(0),
78    //         SimplifiedTreeNode::Start => None,
79    //         SimplifiedTreeNode::End if start == text.len() => Some(0),
80    //         SimplifiedTreeNode::End => None,
81    //         SimplifiedTreeNode::Never => None,
82    //     };
83    // }
84    // /// A non-optimized, backtracking implementation
85    // pub fn check(&self, text: &str) -> bool {
86    //     for i in 0..text.len() {
87    //         if let Some(_) = self._check(&text[..i], i) {
88    //             return true;
89    //         }
90    //     }
91    //     return self._check("", text.len()).is_some();
92    // }
93}
94impl SimplifiedTreeNode {
95    fn from_sub_ere(value: &ERE, mut group_num: usize) -> (SimplifiedTreeNode, usize) {
96        let parts = value
97            .0
98            .iter()
99            .map(|part| {
100                let (new_node, new_group_num) =
101                    SimplifiedTreeNode::from_ere_branch(&part, group_num);
102                group_num = new_group_num;
103                new_node
104            })
105            .collect();
106        return (SimplifiedTreeNode::Union(parts), group_num);
107    }
108    fn from_ere_branch(value: &EREBranch, mut group_num: usize) -> (SimplifiedTreeNode, usize) {
109        let parts = value
110            .0
111            .iter()
112            .map(|part| {
113                let (new_node, new_group_num) = SimplifiedTreeNode::from_ere_part(&part, group_num);
114                group_num = new_group_num;
115                new_node
116            })
117            .collect();
118        return (SimplifiedTreeNode::Concat(parts), group_num);
119    }
120    fn from_ere_part(value: &EREPart, group_num: usize) -> (SimplifiedTreeNode, usize) {
121        return match value {
122            EREPart::Single(expr) => SimplifiedTreeNode::from_ere_expression(expr, group_num),
123            EREPart::Quantified(expr, quantifier) => {
124                let (child, group_num) = SimplifiedTreeNode::from_ere_expression(expr, group_num);
125                let part = match quantifier {
126                    Quantifier::Star => child.star(),
127                    Quantifier::Plus => child.clone().concat(child.star()),
128                    Quantifier::QuestionMark => child.optional(),
129                    Quantifier::Multiple(n) => child.repeat(*n as usize),
130                    Quantifier::Range(n, None) => {
131                        child.clone().repeat(*n as usize).concat(child.star())
132                    }
133                    Quantifier::Range(n, Some(m)) => match m.checked_sub(*n) {
134                        None => SimplifiedTreeNode::Never,
135                        Some(0) => child.repeat(*n as usize),
136                        Some(r) => child
137                            .clone()
138                            .repeat(*n as usize)
139                            .concat(child.upto(r as usize)),
140                    },
141                };
142                (part, group_num)
143            }
144            EREPart::Start => (SimplifiedTreeNode::Start, group_num),
145            EREPart::End => (SimplifiedTreeNode::End, group_num),
146        };
147    }
148    fn from_ere_expression(value: &EREExpression, group_num: usize) -> (SimplifiedTreeNode, usize) {
149        return match value {
150            EREExpression::Atom(atom) => (atom.clone().into(), group_num),
151            EREExpression::Subexpression(ere) => {
152                let (capture, next_group_num) =
153                    SimplifiedTreeNode::from_sub_ere(ere, group_num + 1);
154                (
155                    SimplifiedTreeNode::Capture(capture.into(), group_num),
156                    next_group_num,
157                )
158            }
159        };
160    }
161    /// Returns the simplified tree, along with the number of capture groups (full expression is group 0)
162    pub fn from_ere(value: &ERE) -> (SimplifiedTreeNode, usize) {
163        let (root, groups) = SimplifiedTreeNode::from_sub_ere(value, 1);
164        return (SimplifiedTreeNode::Capture(Box::new(root), 0), groups);
165    }
166}
167impl From<ERE> for SimplifiedTreeNode {
168    fn from(value: ERE) -> Self {
169        return SimplifiedTreeNode::from_ere(&value).0;
170    }
171}
172impl From<Atom> for SimplifiedTreeNode {
173    fn from(value: Atom) -> Self {
174        return SimplifiedTreeNode::Symbol(value);
175    }
176}