1use std::collections::BTreeMap as Map;
2use simple_lexer_bootstrap::Token;
3
4type Result<T> = std::result::Result<T, &'static str>;
5
6#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
7pub enum ParseTree<N, T> {
8 Token { token: Token<T> },
9 Nonterminal { nonterminal: N, tokens: Vec<Token<T>>, children: Vec<ParseTree<N, T>> },
10 Ephemeral { tokens: Vec<Token<T>>, children: Vec<ParseTree<N, T>> },
11}
12
13impl<N, T> ParseTree<N, T> {
14 pub fn tokens_len(&self) -> usize {
15 match self {
16 ParseTree::Token { .. } => 1,
17 ParseTree::Nonterminal { tokens, .. } => tokens.len(),
18 ParseTree::Ephemeral { tokens, .. } => tokens.len(),
19 }
20 }
21}
22
23#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
24pub enum Expression<N, T> {
25 TokenKind { token_kind: T },
26 Nonterminal { nonterminal: N },
27 Alternation { expressions: Vec<Expression<N, T>> },
28 Concatenation { expressions: Vec<Expression<N, T>> },
29 Repetition { expression: Box<Expression<N, T>>, min: Option<u32>, max: Option<u32> },
30}
31
32impl<N: Clone + Ord, T: Clone + PartialEq> Expression<N, T> {
33 pub fn parse(&self, tokens: &[Token<T>], productions: &Map<N, Expression<N, T>>) -> Result<ParseTree<N, T>> {
34 match self {
35 Expression::TokenKind { token_kind } => {
36 if let Some(current_token) = tokens.get(0) {
37 if token_kind == current_token.kind() {
38 Ok(ParseTree::Token {
39 token: current_token.clone(),
40 })
41 } else { Err("unexpected token") }
42 } else { Err("unexpected eof") }
43 },
44 Expression::Nonterminal { nonterminal } => {
45 if let Some(expression) = productions.get(nonterminal) {
46 let parse_tree = expression.parse(tokens, productions)?;
47 match parse_tree {
48 ParseTree::Token { token } => {
49 Ok(ParseTree::Nonterminal {
50 nonterminal: nonterminal.clone(),
51 tokens: vec![token.clone()],
52 children: vec![ParseTree::Token { token }],
53 })
54 },
55 ParseTree::Nonterminal { nonterminal: child_nonterminal, tokens, children } => {
56 Ok(ParseTree::Nonterminal {
57 nonterminal: nonterminal.clone(),
58 tokens: tokens.clone(),
59 children: vec![ParseTree::Nonterminal {
60 nonterminal: child_nonterminal,
61 tokens,
62 children,
63 }],
64 })
65 },
66 ParseTree::Ephemeral { tokens, children } => {
67 Ok(ParseTree::Nonterminal {
68 nonterminal: nonterminal.clone(),
69 tokens,
70 children,
71 })
72 },
73 }
74 } else { Err("undefined nonterminal") }
75 },
76 Expression::Alternation { expressions } => {
77 let mut parse_trees = Vec::new();
78 for expression in expressions {
79 if let Ok(parse_tree) = expression.parse(tokens, productions) {
80 parse_trees.push(parse_tree);
81 }
82 }
83 if let Some(mut parse_tree) = parse_trees.pop() {
84 while let Some(current_parse_tree) = parse_trees.pop() {
85 if parse_tree.tokens_len() < current_parse_tree.tokens_len() {
86 parse_tree = current_parse_tree;
87 }
88 }
89 match parse_tree {
90 ParseTree::Token { token } => {
91 Ok(ParseTree::Ephemeral {
92 tokens: vec![token.clone()],
93 children: vec![ParseTree::Token { token }],
94 })
95 },
96 ParseTree::Nonterminal { nonterminal, tokens, children } => {
97 Ok(ParseTree::Ephemeral {
98 tokens: tokens.clone(),
99 children: vec![ParseTree::Nonterminal {
100 nonterminal,
101 tokens,
102 children,
103 }],
104 })
105 },
106 ParseTree::Ephemeral { tokens, children } => {
107 Ok(ParseTree::Ephemeral {
108 tokens,
109 children,
110 })
111 },
112 }
113 } else { Err("no match") }
114 },
115 Expression::Concatenation { expressions } => {
116 let mut offset = 0;
117 let mut matched_tokens = Vec::new();
118 let mut children = Vec::new();
119 for expression in expressions {
120 if let Ok(parse_tree) = expression.parse(&tokens[offset..], productions) {
121 offset += parse_tree.tokens_len();
122 match parse_tree {
123 ParseTree::Token { token } => {
124 matched_tokens.push(token.clone());
125 children.push(ParseTree::Token { token });
126 },
127 ParseTree::Nonterminal { nonterminal, tokens, children: child_children } => {
128 matched_tokens.extend(tokens.clone());
129 children.push(ParseTree::Nonterminal {
130 nonterminal,
131 tokens,
132 children: child_children,
133 });
134 },
135 ParseTree::Ephemeral { tokens, children: child_children } => {
136 matched_tokens.extend(tokens);
137 children.extend(child_children);
138 },
139 }
140 } else { return Err("no match"); }
141 }
142 Ok(ParseTree::Ephemeral {
143 tokens: matched_tokens,
144 children,
145 })
146 },
147 Expression::Repetition { expression, min, max } => {
148 let mut count = 0;
149 let mut offset = 0;
150 let mut matched_tokens = Vec::new();
151 let mut children = Vec::new();
152 while match max { Some(max) => count < *max, None => true } {
153 if let Ok(parse_tree) = expression.parse(&tokens[offset..], productions) {
154 count += 1;
155 offset += parse_tree.tokens_len();
156 match parse_tree {
157 ParseTree::Token { token } => {
158 matched_tokens.push(token.clone());
159 children.push(ParseTree::Token { token });
160 },
161 ParseTree::Nonterminal { nonterminal, tokens, children: child_children } => {
162 matched_tokens.extend(tokens.clone());
163 children.push(ParseTree::Nonterminal {
164 nonterminal,
165 tokens,
166 children: child_children,
167 });
168 },
169 ParseTree::Ephemeral { tokens, children: child_children } => {
170 matched_tokens.extend(tokens);
171 children.extend(child_children);
172 },
173 }
174 } else if match min { Some(min) => count >= *min, None => true } {
175 return Ok(ParseTree::Ephemeral {
176 tokens: matched_tokens,
177 children,
178 });
179 } else { return Err("partial match"); }
180 }
181 Ok(ParseTree::Ephemeral {
182 tokens: matched_tokens,
183 children,
184 })
185 },
186 }
187 }
188}
189
190#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
191pub struct Parser<N, T> {
192 productions: Map<N, Expression<N, T>>,
193 root: N,
194}
195
196impl<N, T> Parser<N, T> {
197 pub fn new(productions: Map<N, Expression<N, T>>, root: N) -> Parser<N, T> {
198 Parser { productions, root }
199 }
200}
201
202impl<N: Clone + Ord, T: Clone + PartialEq> Parser<N, T> {
203 pub fn parse(&self, tokens: &[Token<T>]) -> Result<ParseTree<N, T>> {
204 if let Some(expression) = self.productions.get(&self.root) {
205 let mut parse_tree = expression.parse(tokens, &self.productions)?;
206 if parse_tree.tokens_len() == tokens.len() {
207 parse_tree = match parse_tree {
208 ParseTree::Token { token } => {
209 ParseTree::Nonterminal {
210 nonterminal: self.root.clone(),
211 tokens: vec![token.clone()],
212 children: vec![
213 ParseTree::Token { token }
214 ]
215 }
216 },
217 ParseTree::Nonterminal { nonterminal, tokens, children } => {
218 ParseTree::Nonterminal {
219 nonterminal: self.root.clone(),
220 tokens: tokens.clone(),
221 children: vec![
222 ParseTree::Nonterminal {
223 nonterminal,
224 tokens,
225 children
226 }
227 ]
228 }
229 },
230 ParseTree::Ephemeral { tokens, children } => {
231 ParseTree::Nonterminal {
232 nonterminal: self.root.clone(),
233 tokens,
234 children,
235 }
236 },
237 };
238 Ok(parse_tree)
239 } else { Err("partial match") }
240 } else { Err("undefined root nonterminal") }
241 }
242}
243
244#[macro_export]
245macro_rules! tok {
246 ($x:expr) => {{
247 $crate::Expression::TokenKind { token_kind: $x }
248 }}
249}
250
251#[macro_export]
252macro_rules! non {
253 ($x:expr) => {{
254 $crate::Expression::Nonterminal { nonterminal: $x }
255 }}
256}
257
258#[macro_export]
259macro_rules! alt {
260 ($($x:expr),*) => {{
261 #[allow(unused_mut)]
262 let mut temp_vec = Vec::new();
263 $(temp_vec.push($x);)*
264 $crate::Expression::Alternation { expressions: temp_vec }
265 }}
266}
267
268#[macro_export]
269macro_rules! con {
270 ($($x:expr),*) => {{
271 #[allow(unused_mut)]
272 let mut temp_vec = Vec::new();
273 $(temp_vec.push($x);)*
274 $crate::Expression::Concatenation { expressions: temp_vec }
275 }}
276}
277
278#[macro_export]
279macro_rules! rep {
280 ($x:expr, $y:expr, $z:expr) => {{
281 $crate::Expression::Repetition { expression: Box::new($x), min: $y, max: $z }
282 }}
283}
284
285#[macro_export]
286macro_rules! ast { ($x:expr) => {{
288 $crate::Expression::Repetition { expression: Box::new($x), min: None, max: None }
289 }}
290}
291
292#[macro_export]
293macro_rules! plu { ($x:expr) => {{
295 $crate::Expression::Repetition { expression: Box::new($x), min: Some(1), max: None }
296 }}
297}
298
299#[macro_export]
300macro_rules! que { ($x:expr) => {{
302 $crate::Expression::Repetition { expression: Box::new($x), min: None, max: Some(1) }
303 }}
304}
305
306#[cfg(test)]
307mod tests {
308 use super::Result;
309 use simple_lexer_bootstrap::Token;
310 use crate::{
311 ParseTree,
312 Parser,
313 };
314
315 #[test]
316 fn test_expression() -> Result<()> {
317 #[allow(non_camel_case_types)]
318 #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
319 enum TokenKind {
320 PLUS_SIGN,
321 HYPHEN,
322 ASTERISK,
323 SLASH,
324 NUMBER,
325 LEFT_PARENTHESIS,
326 RIGHT_PARENTHESIS,
327 }
328 use TokenKind::*;
329 #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
330 enum Nonterminal {
331 Addition,
332 Multiplication,
333 Atom,
334 Number,
335 }
336 use Nonterminal::*;
337 let expected = ParseTree::Nonterminal {
338 nonterminal: Addition,
339 tokens: vec![
340 Token::new(NUMBER, "1"),
341 Token::new(SLASH, "/"),
342 Token::new(LEFT_PARENTHESIS, "("),
343 Token::new(NUMBER, "2"),
344 Token::new(PLUS_SIGN, "+"),
345 Token::new(HYPHEN, "-"),
346 Token::new(NUMBER, "3"),
347 Token::new(RIGHT_PARENTHESIS, ")")
348 ],
349 children: vec![
350 ParseTree::Nonterminal {
351 nonterminal: Multiplication,
352 tokens: vec![
353 Token::new(NUMBER, "1"),
354 Token::new(SLASH, "/"),
355 Token::new(LEFT_PARENTHESIS, "("),
356 Token::new(NUMBER, "2"),
357 Token::new(PLUS_SIGN, "+"),
358 Token::new(HYPHEN, "-"),
359 Token::new(NUMBER, "3"),
360 Token::new(RIGHT_PARENTHESIS, ")")
361 ],
362 children: vec![
363 ParseTree::Nonterminal {
364 nonterminal: Atom,
365 tokens: vec![Token::new(NUMBER, "1")],
366 children: vec![
367 ParseTree::Nonterminal {
368 nonterminal: Number,
369 tokens: vec![Token::new(NUMBER, "1")],
370 children: vec![
371 ParseTree::Token { token: Token::new(NUMBER, "1") }
372 ],
373 },
374 ],
375 },
376 ParseTree::Token { token: Token::new(SLASH, "/") },
377 ParseTree::Nonterminal {
378 nonterminal: Atom,
379 tokens: vec![
380 Token::new(LEFT_PARENTHESIS, "("),
381 Token::new(NUMBER, "2"),
382 Token::new(PLUS_SIGN, "+"),
383 Token::new(HYPHEN, "-"),
384 Token::new(NUMBER, "3"),
385 Token::new(RIGHT_PARENTHESIS, ")")
386 ],
387 children: vec![
388 ParseTree::Token { token: Token::new(LEFT_PARENTHESIS, "(") },
389 ParseTree::Nonterminal {
390 nonterminal: Addition,
391 tokens: vec![
392 Token::new(NUMBER, "2"),
393 Token::new(PLUS_SIGN, "+"),
394 Token::new(HYPHEN, "-"),
395 Token::new(NUMBER, "3"),
396 ],
397 children: vec![
398 ParseTree::Nonterminal {
399 nonterminal: Multiplication,
400 tokens: vec![Token::new(NUMBER, "2")],
401 children: vec![
402 ParseTree::Nonterminal {
403 nonterminal: Atom,
404 tokens: vec![Token::new(NUMBER, "2")],
405 children: vec![
406 ParseTree::Nonterminal {
407 nonterminal: Number,
408 tokens: vec![Token::new(NUMBER, "2")],
409 children: vec![
410 ParseTree::Token { token: Token::new(NUMBER, "2") }
411 ],
412 }
413 ]
414 },
415 ]
416 },
417 ParseTree::Token { token: Token::new(PLUS_SIGN, "+") },
418 ParseTree::Nonterminal {
419 nonterminal: Multiplication,
420 tokens: vec![
421 Token::new(HYPHEN, "-"),
422 Token::new(NUMBER, "3")
423 ],
424 children: vec![
425 ParseTree::Nonterminal {
426 nonterminal: Atom,
427 tokens: vec![
428 Token::new(HYPHEN, "-"),
429 Token::new(NUMBER, "3")
430 ],
431 children: vec![
432 ParseTree::Nonterminal {
433 nonterminal: Number,
434 tokens: vec![
435 Token::new(HYPHEN, "-"),
436 Token::new(NUMBER, "3")
437 ],
438 children: vec![
439 ParseTree::Token { token: Token::new(HYPHEN, "-") },
440 ParseTree::Token { token: Token::new(NUMBER, "3") },
441 ],
442 }
443 ]
444 },
445 ]
446 },
447 ]
448 },
449 ParseTree::Token { token: Token::new(RIGHT_PARENTHESIS, ")") },
450 ]
451 }
452 ],
453 }
454 ]
455 };
456 let parser = Parser::new(map![
457 Addition => con![non!(Multiplication), ast!(con![alt![tok!(PLUS_SIGN), tok!(HYPHEN)], non!(Multiplication)])],
458 Multiplication => con![non!(Atom), ast!(con![alt![tok!(ASTERISK), tok!(SLASH)], non!(Atom)])],
459 Atom => alt![non!(Number), con![tok!(LEFT_PARENTHESIS), non!(Addition), tok!(RIGHT_PARENTHESIS)]],
460 Number => con![que!(alt![tok!(PLUS_SIGN), tok!(HYPHEN)]), tok!(NUMBER)]
461 ], Addition);
462 let actual = parser.parse(&[
464 Token::new(NUMBER, "1"),
465 Token::new(SLASH, "/"),
466 Token::new(LEFT_PARENTHESIS, "("),
467 Token::new(NUMBER, "2"),
468 Token::new(PLUS_SIGN, "+"),
469 Token::new(HYPHEN, "-"),
470 Token::new(NUMBER, "3"),
471 Token::new(RIGHT_PARENTHESIS, ")")
472 ])?;
473 assert_eq!(expected, actual);
474 Ok(())
475 }
476}