1use std::num::NonZeroUsize;
4
5use crate::{config::Config, parse_tree::*};
6
7#[derive(Debug, PartialEq, Eq, Clone)]
9pub enum SimplifiedTreeNode {
10 Empty,
12 Symbol(Atom),
13 Union(Vec<SimplifiedTreeNode>),
14 Capture(Box<SimplifiedTreeNode>, usize),
16 Concat(Vec<SimplifiedTreeNode>),
17 Repeat(Box<SimplifiedTreeNode>, NonZeroUsize),
19 UpTo(Box<SimplifiedTreeNode>, NonZeroUsize, bool),
21 Star(Box<SimplifiedTreeNode>, bool),
23 Start,
24 End,
25 Never,
26}
27impl SimplifiedTreeNode {
28 pub fn optional(self, longest: bool) -> SimplifiedTreeNode {
29 return self.upto(1, longest);
30 }
31 pub fn union(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
32 if let SimplifiedTreeNode::Union(mut u) = self {
33 u.push(other);
34 return SimplifiedTreeNode::Union(u);
35 }
36 return SimplifiedTreeNode::Union(vec![self, other]);
37 }
38 pub fn capture(self, group_num: usize) -> SimplifiedTreeNode {
39 return SimplifiedTreeNode::Capture(Box::new(self), group_num);
40 }
41 pub fn concat(self, other: SimplifiedTreeNode) -> SimplifiedTreeNode {
42 if let SimplifiedTreeNode::Concat(mut c) = self {
43 c.push(other);
44 return SimplifiedTreeNode::Concat(c);
45 }
46 return SimplifiedTreeNode::Concat(vec![self, other]);
47 }
48 pub fn repeat(self, count: usize) -> SimplifiedTreeNode {
49 return match NonZeroUsize::new(count) {
50 Some(count) => SimplifiedTreeNode::Repeat(self.into(), count),
51 None => SimplifiedTreeNode::Empty,
52 };
53 }
54 pub fn upto(self, count: usize, longest: bool) -> SimplifiedTreeNode {
55 return match NonZeroUsize::new(count) {
56 Some(count) => SimplifiedTreeNode::UpTo(self.into(), count, longest),
57 None => SimplifiedTreeNode::Empty,
58 };
59 }
60 pub fn star(self, longest: bool) -> SimplifiedTreeNode {
61 return SimplifiedTreeNode::Star(self.into(), longest);
62 }
63
64 pub fn max_bytes(&self) -> Option<usize> {
111 return match self {
112 SimplifiedTreeNode::Empty => Some(0),
113 SimplifiedTreeNode::Symbol(atom) => {
114 let range = atom.to_ranges().last()?.clone();
115 Some(range.end().len_utf8())
116 }
117 SimplifiedTreeNode::Union(nodes) if nodes.is_empty() => Some(0),
118 SimplifiedTreeNode::Union(nodes) => nodes
119 .iter()
120 .map(SimplifiedTreeNode::max_bytes)
121 .reduce(|a, b| Some(std::cmp::max(a?, b?)))?,
122 SimplifiedTreeNode::Capture(node, _) => node.max_bytes(),
123 SimplifiedTreeNode::Concat(nodes) if nodes.is_empty() => Some(0),
124 SimplifiedTreeNode::Concat(nodes) => {
125 nodes.iter().map(SimplifiedTreeNode::max_bytes).sum()
126 }
127 SimplifiedTreeNode::Repeat(node, times) => Some(node.max_bytes()? * times.get()),
128 SimplifiedTreeNode::UpTo(node, times, _) => Some(node.max_bytes()? * times.get()),
129 SimplifiedTreeNode::Star(_, _) => None,
130 SimplifiedTreeNode::Start => Some(0),
131 SimplifiedTreeNode::End => Some(0),
132 SimplifiedTreeNode::Never => Some(0),
133 };
134 }
135
136 pub fn min_bytes(&self) -> usize {
138 return match self {
139 SimplifiedTreeNode::Empty => 0,
140 SimplifiedTreeNode::Symbol(atom) => {
141 let Some(range) = atom.to_ranges().first().cloned() else {
142 return 0;
143 };
144 range.start().len_utf8()
145 }
146 SimplifiedTreeNode::Union(nodes) => nodes
147 .iter()
148 .map(SimplifiedTreeNode::min_bytes)
149 .min()
150 .unwrap_or(0),
151 SimplifiedTreeNode::Capture(node, _) => node.min_bytes(),
152 SimplifiedTreeNode::Concat(nodes) => {
153 nodes.iter().map(SimplifiedTreeNode::min_bytes).sum()
154 }
155 SimplifiedTreeNode::Repeat(node, times) => node.min_bytes() * times.get(),
156 SimplifiedTreeNode::UpTo(node, times, _) => node.min_bytes() * times.get(),
157 SimplifiedTreeNode::Star(_, _) => 0,
158 SimplifiedTreeNode::Start => 0,
159 SimplifiedTreeNode::End => 0,
160 SimplifiedTreeNode::Never => 0,
161 };
162 }
163}
164impl SimplifiedTreeNode {
165 fn from_sub_ere(
166 value: &ERE,
167 mut group_num: usize,
168 config: &Config,
169 ) -> (SimplifiedTreeNode, usize) {
170 let parts = value
171 .0
172 .iter()
173 .map(|part| {
174 let (new_node, new_group_num) =
175 SimplifiedTreeNode::from_ere_branch(&part, group_num, config);
176 group_num = new_group_num;
177 new_node
178 })
179 .collect();
180 return (SimplifiedTreeNode::Union(parts), group_num);
181 }
182 fn from_ere_branch(
183 value: &EREBranch,
184 mut group_num: usize,
185 config: &Config,
186 ) -> (SimplifiedTreeNode, usize) {
187 let parts = value
188 .0
189 .iter()
190 .map(|part| {
191 let (new_node, new_group_num) =
192 SimplifiedTreeNode::from_ere_part(&part, group_num, config);
193 group_num = new_group_num;
194 new_node
195 })
196 .collect();
197 return (SimplifiedTreeNode::Concat(parts), group_num);
198 }
199 fn from_ere_part(
200 value: &EREPart,
201 group_num: usize,
202 config: &Config,
203 ) -> (SimplifiedTreeNode, usize) {
204 return match value {
205 EREPart::Single(expr) => {
206 SimplifiedTreeNode::from_ere_expression(expr, group_num, config)
207 }
208 EREPart::Quantified(expr, quantifier) => {
209 let (child, group_num) =
210 SimplifiedTreeNode::from_ere_expression(expr, group_num, config);
211 let longest = config.quantifiers_prefer_longest() ^ quantifier.alt;
212 let part = match &quantifier.quantifier {
213 QuantifierType::Star => child.star(longest),
214 QuantifierType::Plus => child.clone().concat(child.star(longest)),
215 QuantifierType::QuestionMark => child.optional(longest),
216 QuantifierType::Multiple(n) => child.repeat(*n as usize),
217 QuantifierType::Range(n, None) => child
218 .clone()
219 .repeat(*n as usize)
220 .concat(child.star(longest)),
221 QuantifierType::Range(n, Some(m)) => match m.checked_sub(*n) {
222 None => SimplifiedTreeNode::Never,
223 Some(0) => child.repeat(*n as usize),
224 Some(r) => child
225 .clone()
226 .repeat(*n as usize)
227 .concat(child.upto(r as usize, longest)),
228 },
229 };
230 (part, group_num)
231 }
232 EREPart::Start => (SimplifiedTreeNode::Start, group_num),
233 EREPart::End => (SimplifiedTreeNode::End, group_num),
234 };
235 }
236 fn from_ere_expression(
237 value: &EREExpression,
238 group_num: usize,
239 config: &Config,
240 ) -> (SimplifiedTreeNode, usize) {
241 return match value {
242 EREExpression::Atom(atom) => (atom.clone().into(), group_num),
243 EREExpression::Subexpression(ere) => {
244 let (capture, next_group_num) =
245 SimplifiedTreeNode::from_sub_ere(ere, group_num + 1, config);
246 (
247 SimplifiedTreeNode::Capture(capture.into(), group_num),
248 next_group_num,
249 )
250 }
251 };
252 }
253 pub fn from_ere(value: &ERE, config: &Config) -> (SimplifiedTreeNode, usize) {
255 let (root, groups) = SimplifiedTreeNode::from_sub_ere(value, 1, config);
256 return (SimplifiedTreeNode::Capture(Box::new(root), 0), groups);
257 }
258
259 pub(crate) fn from_ere_no_group0(value: &ERE, config: &Config) -> (SimplifiedTreeNode, usize) {
261 return SimplifiedTreeNode::from_sub_ere(value, 1, config);
262 }
263}
264impl From<ERE> for SimplifiedTreeNode {
265 fn from(value: ERE) -> Self {
266 return SimplifiedTreeNode::from_ere(&value, &Config::default()).0;
267 }
268}
269impl From<Atom> for SimplifiedTreeNode {
270 fn from(value: Atom) -> Self {
271 return SimplifiedTreeNode::Symbol(value);
272 }
273}