1use crate::{config::Config, parse_tree::*};
4
5#[derive(Debug, PartialEq, Eq, Clone)]
7pub enum SimplifiedTreeNode {
8 Empty,
10 Symbol(Atom),
11 Union(Vec<SimplifiedTreeNode>),
12 Capture(Box<SimplifiedTreeNode>, usize),
14 Concat(Vec<SimplifiedTreeNode>),
15 Repeat(Box<SimplifiedTreeNode>, usize),
17 UpTo(Box<SimplifiedTreeNode>, usize, bool),
19 Star(Box<SimplifiedTreeNode>, bool),
21 Start,
22 End,
23 Never,
24}
25impl SimplifiedTreeNode {
26 pub fn optional(self, longest: bool) -> SimplifiedTreeNode {
27 return self.upto(1, longest);
28 }
29 pub fn union(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
30 if let SimplifiedTreeNode::Union(mut u) = self {
31 u.push(other);
32 return SimplifiedTreeNode::Union(u);
33 }
34 return SimplifiedTreeNode::Union(vec![self, other]);
35 }
36 pub fn capture(self, group_num: usize) -> SimplifiedTreeNode {
37 return SimplifiedTreeNode::Capture(Box::new(self), group_num);
38 }
39 pub fn concat(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
40 if let SimplifiedTreeNode::Concat(mut c) = self {
41 c.push(other);
42 return SimplifiedTreeNode::Concat(c);
43 }
44 return SimplifiedTreeNode::Concat(vec![self, other]);
45 }
46 pub fn repeat(self, count: usize) -> SimplifiedTreeNode {
47 return SimplifiedTreeNode::Repeat(self.into(), count);
48 }
49 pub fn upto(self, count: usize, longest: bool) -> SimplifiedTreeNode {
50 return SimplifiedTreeNode::UpTo(self.into(), count, longest);
51 }
52 pub fn star(self, longest: bool) -> SimplifiedTreeNode {
53 return SimplifiedTreeNode::Star(self.into(), longest);
54 }
55
56 pub fn max_bytes(&self) -> Option<usize> {
103 return match self {
104 SimplifiedTreeNode::Empty => Some(0),
105 SimplifiedTreeNode::Symbol(atom) => {
106 let range = atom.to_ranges().last()?.clone();
107 Some(range.end().len_utf8())
108 }
109 SimplifiedTreeNode::Union(nodes) if nodes.is_empty() => Some(0),
110 SimplifiedTreeNode::Union(nodes) => nodes
111 .iter()
112 .map(SimplifiedTreeNode::max_bytes)
113 .reduce(|a, b| Some(std::cmp::max(a?, b?)))?,
114 SimplifiedTreeNode::Capture(node, _) => node.max_bytes(),
115 SimplifiedTreeNode::Concat(nodes) if nodes.is_empty() => Some(0),
116 SimplifiedTreeNode::Concat(nodes) => {
117 nodes.iter().map(SimplifiedTreeNode::max_bytes).sum()
118 }
119 SimplifiedTreeNode::Repeat(node, times) => Some(node.max_bytes()? * times),
120 SimplifiedTreeNode::UpTo(node, times, _) => Some(node.max_bytes()? * times),
121 SimplifiedTreeNode::Star(_, _) => None,
122 SimplifiedTreeNode::Start => Some(0),
123 SimplifiedTreeNode::End => Some(0),
124 SimplifiedTreeNode::Never => Some(0),
125 };
126 }
127
128 pub fn min_bytes(&self) -> usize {
130 return match self {
131 SimplifiedTreeNode::Empty => 0,
132 SimplifiedTreeNode::Symbol(atom) => {
133 let Some(range) = atom.to_ranges().first().cloned() else {
134 return 0;
135 };
136 range.start().len_utf8()
137 }
138 SimplifiedTreeNode::Union(nodes) => nodes
139 .iter()
140 .map(SimplifiedTreeNode::min_bytes)
141 .min()
142 .unwrap_or(0),
143 SimplifiedTreeNode::Capture(node, _) => node.min_bytes(),
144 SimplifiedTreeNode::Concat(nodes) => {
145 nodes.iter().map(SimplifiedTreeNode::min_bytes).sum()
146 }
147 SimplifiedTreeNode::Repeat(node, times) => node.min_bytes() * times,
148 SimplifiedTreeNode::UpTo(node, times, _) => node.min_bytes() * times,
149 SimplifiedTreeNode::Star(_, _) => 0,
150 SimplifiedTreeNode::Start => 0,
151 SimplifiedTreeNode::End => 0,
152 SimplifiedTreeNode::Never => 0,
153 };
154 }
155}
156impl SimplifiedTreeNode {
157 fn from_sub_ere(
158 value: &ERE,
159 mut group_num: usize,
160 config: &Config,
161 ) -> (SimplifiedTreeNode, usize) {
162 let parts = value
163 .0
164 .iter()
165 .map(|part| {
166 let (new_node, new_group_num) =
167 SimplifiedTreeNode::from_ere_branch(&part, group_num, config);
168 group_num = new_group_num;
169 new_node
170 })
171 .collect();
172 return (SimplifiedTreeNode::Union(parts), group_num);
173 }
174 fn from_ere_branch(
175 value: &EREBranch,
176 mut group_num: usize,
177 config: &Config,
178 ) -> (SimplifiedTreeNode, usize) {
179 let parts = value
180 .0
181 .iter()
182 .map(|part| {
183 let (new_node, new_group_num) =
184 SimplifiedTreeNode::from_ere_part(&part, group_num, config);
185 group_num = new_group_num;
186 new_node
187 })
188 .collect();
189 return (SimplifiedTreeNode::Concat(parts), group_num);
190 }
191 fn from_ere_part(
192 value: &EREPart,
193 group_num: usize,
194 config: &Config,
195 ) -> (SimplifiedTreeNode, usize) {
196 return match value {
197 EREPart::Single(expr) => {
198 SimplifiedTreeNode::from_ere_expression(expr, group_num, config)
199 }
200 EREPart::Quantified(expr, quantifier) => {
201 let (child, group_num) =
202 SimplifiedTreeNode::from_ere_expression(expr, group_num, config);
203 let longest = config.quantifiers_prefer_longest() ^ quantifier.alt;
204 let part = match &quantifier.quantifier {
205 QuantifierType::Star => child.star(longest),
206 QuantifierType::Plus => child.clone().concat(child.star(longest)),
207 QuantifierType::QuestionMark => child.optional(longest),
208 QuantifierType::Multiple(n) => child.repeat(*n as usize),
209 QuantifierType::Range(n, None) => child
210 .clone()
211 .repeat(*n as usize)
212 .concat(child.star(longest)),
213 QuantifierType::Range(n, Some(m)) => match m.checked_sub(*n) {
214 None => SimplifiedTreeNode::Never,
215 Some(0) => child.repeat(*n as usize),
216 Some(r) => child
217 .clone()
218 .repeat(*n as usize)
219 .concat(child.upto(r as usize, longest)),
220 },
221 };
222 (part, group_num)
223 }
224 EREPart::Start => (SimplifiedTreeNode::Start, group_num),
225 EREPart::End => (SimplifiedTreeNode::End, group_num),
226 };
227 }
228 fn from_ere_expression(
229 value: &EREExpression,
230 group_num: usize,
231 config: &Config,
232 ) -> (SimplifiedTreeNode, usize) {
233 return match value {
234 EREExpression::Atom(atom) => (atom.clone().into(), group_num),
235 EREExpression::Subexpression(ere) => {
236 let (capture, next_group_num) =
237 SimplifiedTreeNode::from_sub_ere(ere, group_num + 1, config);
238 (
239 SimplifiedTreeNode::Capture(capture.into(), group_num),
240 next_group_num,
241 )
242 }
243 };
244 }
245 pub fn from_ere(value: &ERE, config: &Config) -> (SimplifiedTreeNode, usize) {
247 let (root, groups) = SimplifiedTreeNode::from_sub_ere(value, 1, config);
248 return (SimplifiedTreeNode::Capture(Box::new(root), 0), groups);
249 }
250
251 pub(crate) fn from_ere_no_group0(value: &ERE, config: &Config) -> (SimplifiedTreeNode, usize) {
253 return SimplifiedTreeNode::from_sub_ere(value, 1, config);
254 }
255}
256impl From<ERE> for SimplifiedTreeNode {
257 fn from(value: ERE) -> Self {
258 return SimplifiedTreeNode::from_ere(&value, &Config::default()).0;
259 }
260}
261impl From<Atom> for SimplifiedTreeNode {
262 fn from(value: Atom) -> Self {
263 return SimplifiedTreeNode::Symbol(value);
264 }
265}