1use crate::peg::grammar::PEG;
2use crate::peg::parsing::{Capture, ParserError, ParserErrorCode, ParserOutput, Token};
3use log::debug;
4use std::collections::HashSet;
5use std::fmt::{Display, Formatter};
6use std::rc::Rc;
7
8#[derive(Clone, Debug)]
10pub enum Expression {
11 Empty,
12 Any,
13 Literal(String),
14 NonTerminal(String), Range(String, String),
16 Class(HashSet<String>),
17 Group(Rc<Box<Expression>>),
18 ZeroOrMore(Rc<Box<Expression>>),
19 OneOrMore(Rc<Box<Expression>>),
20 Optional(Rc<Box<Expression>>),
21 And(Rc<Box<Expression>>),
22 Not(Rc<Box<Expression>>),
23 Choice(Vec<Expression>), Sequence(Vec<Expression>), }
26
27impl Expression {
28 pub fn into_rc_box(self) -> Rc<Box<Expression>> {
29 Rc::new(Box::new(self))
30 }
31
32 pub fn make_class(symbols: &[&str]) -> Expression {
33 let mut class: HashSet<String> = Default::default();
34 for s in symbols.iter().map(|c| c.to_string()) {
35 class.insert(s);
36 }
37
38 Expression::Class(class)
39 }
40
41 pub fn parse(&self, peg: &PEG, input: &str, cursor: usize, depth: usize) -> ParserOutput {
42 match self {
43 Expression::Empty => ParserOutput(1, cursor, Ok(vec![])),
44 Expression::Any => {
45 if cursor < input.len() {
46 ParserOutput(
47 1,
48 cursor + 1,
49 Ok(vec![Token::Terminal(Capture(cursor, cursor + 1))]),
50 )
51 } else {
52 ParserOutput(
53 1,
54 cursor,
55 Err(ParserError {
56 position: cursor,
57 expression: self.clone(),
58 error: ParserErrorCode::UnexpectedEndOfInput,
59 cause: None,
60 }),
61 )
62 }
63 }
64 Expression::Literal(pattern) => {
65 let input = &input[cursor..];
66 let len = pattern.len();
67 if input.starts_with(pattern) {
68 ParserOutput(
69 len as u32,
70 cursor + len,
71 Ok(vec![Token::Terminal(Capture(cursor, cursor + len))]),
72 )
73 } else {
74 ParserOutput(
76 len as u32,
77 cursor,
78 Err(ParserError {
79 position: cursor,
80 expression: self.clone(),
81 error: ParserErrorCode::ExpressionDoesNotMatch,
82 cause: None,
83 }),
84 )
85 }
86 }
87 Expression::NonTerminal(nt) => {
88 let rule = peg
89 .rules
90 .get(nt)
91 .expect(format!("Encountered unknown NonTerminal symbol: {}", nt).as_str());
92
93 debug!("{}parsing {nt} @ {cursor}: {rule}", "│".repeat(depth));
94
95 let ParserOutput(cost, new_cursor, res) = rule.parse(peg, input, cursor, depth + 1);
96
97 let res = match res {
98 Ok(matches) => ParserOutput(
99 cost,
100 new_cursor,
101 Ok(vec![Token::NonTerminal(
102 nt.clone(),
103 Capture(cursor, new_cursor),
104 matches,
105 )]),
106 ),
107 Err(inner) => ParserOutput(
108 cost,
109 cursor,
110 Err(ParserError {
111 position: cursor,
112 expression: self.clone(),
113 error: ParserErrorCode::NonTerminalDoesNotMatch,
114 cause: Some(Box::from(inner)),
115 }),
116 ),
117 };
118
119 debug!(
120 "{}└{} parsing {nt} @ {cursor} advanced to {new_cursor}",
121 "│".repeat(depth),
122 if res.2.is_ok() { "success" } else { "failure" }
123 );
124
125 res
126 }
127 Expression::Range(from, to) => {
128 if (from.as_str()..to.as_str()).contains(&&input[cursor..]) {
130 ParserOutput(
133 1,
134 cursor + 1,
135 Ok(vec![Token::Terminal(Capture(cursor, cursor + 1))]),
136 )
137 } else {
138 ParserOutput(
139 1,
140 cursor,
141 Err(ParserError {
142 position: cursor,
143 expression: self.clone(),
144 error: ParserErrorCode::ExpressionDoesNotMatch,
145 cause: None,
146 }),
147 )
148 }
149 }
150 Expression::Class(class) => {
151 let input = &input[cursor..];
152 let mut cost = 1;
153 for symbol in class {
154 cost += symbol.len();
155 if input.starts_with(symbol) {
156 return ParserOutput(
157 cost as u32,
158 cursor + symbol.len(),
159 Ok(vec![Token::Terminal(Capture(
160 cursor,
161 cursor + symbol.len(),
162 ))]),
163 );
164 }
165 }
166 ParserOutput(
167 cost as u32,
168 cursor,
169 Err(ParserError {
170 position: cursor,
171 expression: self.clone(),
172 error: ParserErrorCode::ExpressionDoesNotMatch,
173 cause: None,
174 }),
175 )
176 }
177 Expression::Group(expression) => {
178 let ParserOutput(cost, cursor, res) = expression.parse(peg, input, cursor, depth);
179 ParserOutput(cost + 1, cursor, res)
180 }
181 Expression::ZeroOrMore(expression) => {
182 let mut captures = vec![];
183 let mut new_cursor = cursor;
184 let mut running_cost = 1;
185 loop {
186 let ParserOutput(cost, downstream_cursor, res) =
187 expression.parse(peg, input, new_cursor, depth);
188 running_cost += cost;
189 match res {
190 Ok(mut matches) => {
191 new_cursor = downstream_cursor;
192 captures.append(&mut matches);
193 }
194 Err(_) => {
195 break ParserOutput(running_cost, new_cursor, Ok(captures));
196 }
197 }
198 }
199 }
200 Expression::OneOrMore(expression) => {
201 let desugared = Expression::Sequence(vec![
202 Expression::Group(expression.clone()),
203 Expression::ZeroOrMore(expression.clone()),
204 ]);
205 desugared.parse(peg, input, cursor, depth)
206 }
207 Expression::Optional(expression) => {
208 let ParserOutput(cost, cursor, res) = expression.parse(peg, input, cursor, depth);
209 match res {
210 Ok(matches) => ParserOutput(cost + 1, cursor, Ok(matches)),
211 Err(_) => ParserOutput(cost + 1, cursor, Ok(vec![])),
212 }
213 }
214 Expression::And(peek) => {
215 let ParserOutput(cost, _, res) = peek.parse(peg, input, cursor, depth);
216 match res {
217 Ok(_) => ParserOutput(cost + 1, cursor, Ok(vec![])),
218 Err(inner) => ParserOutput(
219 cost + 1,
220 cursor,
221 Err(ParserError {
222 position: cursor,
223 expression: self.clone(),
224 error: ParserErrorCode::ExpressionDoesNotMatch,
225 cause: Some(Box::from(inner)),
226 }),
227 ),
228 }
229 }
230 Expression::Not(peek) => {
231 let ParserOutput(cost, _, res) = peek.parse(peg, input, cursor, depth);
232 match res {
233 Ok(inner) => ParserOutput(
234 cost + 1,
235 cursor,
236 Err(ParserError {
237 position: cursor,
238 expression: self.clone(),
239 error: ParserErrorCode::NotDidMatch(inner),
240 cause: None,
241 }),
242 ),
243 Err(_) => ParserOutput(cost + 1, cursor, Ok(vec![])),
244 }
245 }
246 Expression::Choice(choices) => {
247 let mut running_cost = 1;
248 for expression in choices {
249 let ParserOutput(cost, cursor, res) =
250 expression.parse(peg, input, cursor, depth);
251 running_cost += cost;
252 if let Ok(res) = res {
253 return ParserOutput(running_cost, cursor, Ok(res));
254 }
255 }
256
257 ParserOutput(
258 running_cost,
259 cursor,
260 Err(ParserError {
261 position: cursor,
262 expression: self.clone(),
263 error: ParserErrorCode::ExpressionDoesNotMatch,
264 cause: None,
265 }),
266 )
267 }
268 Expression::Sequence(sequence) => {
269 let mut running_cost = 1;
270 let mut captures: Vec<Token> = vec![];
271 let mut new_cursor = cursor;
272
273 for expression in sequence {
274 let ParserOutput(cost, cursor, res) =
275 expression.parse(peg, input, new_cursor, depth);
276 running_cost += cost;
277 match res {
278 Ok(mut res) => {
279 captures.append(&mut res);
280 new_cursor = cursor;
281 }
282 Err(inner) => {
283 return ParserOutput(
284 running_cost,
285 cursor,
286 Err(ParserError {
287 position: cursor,
288 expression: self.clone(),
289 error: ParserErrorCode::ExpressionDoesNotMatch,
290 cause: Some(Box::from(inner)),
291 }),
292 );
293 }
294 }
295 }
296
297 ParserOutput(running_cost, new_cursor, Ok(captures))
298 }
299 }
300 }
301}
302
303fn escape_literal(literal: &String) -> String {
304 literal
305 .replace("\\", "\\\\")
306 .replace("\\\\0", "\\0")
307 .replace("\\\\1", "\\1")
308 .replace("\\\\2", "\\2")
309 .replace("\\\\3", "\\3")
310 .replace("\\\\4", "\\4")
311 .replace("\\\\5", "\\5")
312 .replace("\\\\6", "\\6")
313 .replace("\\\\7", "\\7")
314 .replace("\\\\8", "\\8")
315 .replace("\\\\9", "\\9")
316 .replace("\r", "\\r")
317 .replace("\t", "\\t")
318 .replace("\n", "\\n")
319}
320
321impl Display for Expression {
322 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
323 match self {
324 Expression::Empty => write!(f, "()"),
325 Expression::Any => write!(f, "."),
326 Expression::Literal(literal) => {
327 write!(f, "'{}'", escape_literal(literal).replace("'", "\\'"))
328 }
329 Expression::NonTerminal(token) => write!(f, "{}", token),
330 Expression::Range(a, b) => write!(f, "[{}-{}]", a, b), Expression::Class(class) => {
332 write!(
333 f,
334 "[{}]",
335 class
336 .iter()
337 .map(|c| escape_literal(c).replace("[", "\\[").replace("]", "\\]"))
338 .collect::<Vec<_>>()
339 .join("")
340 )
341 }
342 Expression::Group(group) => write!(f, "({})", group),
343 Expression::ZeroOrMore(inner) => write!(f, "{}*", inner),
344 Expression::OneOrMore(inner) => write!(f, "{}+", inner),
345 Expression::Optional(inner) => write!(f, "{}?", inner),
346 Expression::And(inner) => write!(f, "&{}", inner),
347 Expression::Not(inner) => write!(f, "!{}", inner),
348 Expression::Choice(choices) => {
349 if choices.len() > 1 {
350 write!(
351 f,
352 "({})",
353 choices
354 .iter()
355 .map(|c| format!("{}", c))
356 .collect::<Vec<_>>()
357 .join(" / ")
358 )
359 } else {
360 write!(
361 f,
362 "{}",
363 choices
364 .iter()
365 .map(|c| format!("{}", c))
366 .collect::<Vec<_>>()
367 .join(" / ")
368 )
369 }
370 }
371 Expression::Sequence(sequence) => {
372 if sequence.len() > 1 {
373 write!(
374 f,
375 "({})",
376 sequence
377 .iter()
378 .map(|c| format!("{}", c))
379 .collect::<Vec<_>>()
380 .join(" ")
381 )
382 } else {
383 write!(
384 f,
385 "{}",
386 sequence
387 .iter()
388 .map(|c| format!("{}", c))
389 .collect::<Vec<_>>()
390 .join(" ")
391 )
392 }
393 }
394 }
395 }
396}