lwb_parser/parser/peg/
parser_sugar.rs

1use crate::codegen_prelude::{ParsePairExpression, ParsePairSort};
2use crate::parser::peg::parse_error::PEGParseError;
3use crate::parser::peg::parser_core_ast::{CoreAst, CoreExpression, CoreSort, ParsePairRaw};
4use crate::parser::peg::parser_core_file;
5use crate::parser::peg::parser_sugar_ast::{Annotation, Expression, Sort, SyntaxFileAst};
6use crate::sources::character_class::CharacterClass;
7use crate::sources::source_file::SourceFile;
8use itertools::Itertools;
9use std::collections::HashMap;
10
11/// Parse a file by:
12/// 1. Desugaring the AST to core syntax
13/// 2. Parsing the source file using core syntax
14/// 3. Resugaring the resulting ParsePairRaw
15pub fn parse_file<'src>(
16    ast: &'src SyntaxFileAst,
17    file: &'src SourceFile,
18) -> (ParsePairSort<'src>, Vec<PEGParseError>) {
19    //Desugar
20    let core_ast = desugar_ast(ast);
21
22    //Parse
23    let (res, errs) = parser_core_file::parse_file(&core_ast, file);
24
25    //Resugar
26    let starting_sort = ast.sorts.get(&ast.starting_sort).unwrap();
27    (resugar_sort(ast, starting_sort, res), errs)
28}
29
30fn desugar_ast(ast: &SyntaxFileAst) -> CoreAst {
31    let mut sorts = HashMap::new();
32    //Insert all sorts
33    ast.sorts.values().for_each(|s| {
34        sorts.insert(&s.name[..], desugar_sort(s));
35    });
36    //If there is no layout sort, insert one
37    if !sorts.contains_key("layout") {
38        sorts.insert(
39            "layout",
40            CoreSort {
41                name: "layout",
42                expr: CoreExpression::CharacterClass(CharacterClass::Nothing),
43                annotations: vec![],
44            },
45        );
46    }
47
48    CoreAst {
49        sorts,
50        starting_sort: &ast.starting_sort,
51    }
52}
53
54fn desugar_sort(sort: &Sort) -> CoreSort {
55    CoreSort {
56        name: &sort.name,
57        expr: CoreExpression::Choice(
58            sort.constructors
59                .iter()
60                .map(|c| {
61                    let mut base = desugar_expr(&c.expression);
62                    if c.annotations.contains(&Annotation::NoLayout) {
63                        base = CoreExpression::FlagNoLayout(Box::new(base));
64                        base = CoreExpression::FlagNoErrors(
65                            Box::new(base),
66                            String::from_iter([&sort.name, ".", &c.name]),
67                        );
68                    }
69
70                    if let Some(e) = c.annotations.iter().find_map(|i| {
71                        if let Annotation::Error(e) = i {
72                            Some(e)
73                        } else {
74                            None
75                        }
76                    }) {
77                        base = CoreExpression::Error(Box::new(base), e.to_string())
78                    }
79
80                    base
81                })
82                .collect(),
83        ),
84        annotations: sort.annotations.clone(),
85    }
86}
87
88fn desugar_expr(expr: &Expression) -> CoreExpression {
89    match expr {
90        Expression::Sort(name) => CoreExpression::Name(&name[..]),
91        Expression::Sequence(constructors) => {
92            CoreExpression::Sequence(constructors.iter().map(desugar_expr).collect_vec())
93        }
94        Expression::Repeat { e: c, min, max } => CoreExpression::Repeat {
95            subexpr: Box::new(desugar_expr(c)),
96            min: *min,
97            max: *max,
98        },
99        Expression::CharacterClass(cc) => CoreExpression::CharacterClass(cc.clone()),
100        Expression::Choice(constructors) => {
101            CoreExpression::Choice(constructors.iter().map(desugar_expr).collect_vec())
102        }
103        //Literals are desugared to a sequence of character classes
104        Expression::Literal(lit) => {
105            CoreExpression::FlagNoLayout(Box::new(CoreExpression::FlagNoErrors(
106                Box::new(CoreExpression::Sequence(
107                    lit.chars()
108                        .map(|c| CoreExpression::CharacterClass(c.into()))
109                        .collect_vec(),
110                )),
111                String::from_iter(["'", lit, "'"]),
112            )))
113        }
114        Expression::Negative(_) => {
115            todo!()
116        }
117        Expression::Positive(_) => {
118            todo!()
119        }
120        Expression::Delimited {
121            e,
122            delim,
123            min,
124            max,
125            trailing,
126        } => {
127            let e = desugar_expr(e);
128            let delim = desugar_expr(delim);
129
130            let mut options = vec![];
131            //Can parse count > 0
132            if max.is_none() || max.unwrap() > 0 {
133                options.push(CoreExpression::Sequence(vec![
134                    e.clone(),
135                    CoreExpression::Repeat {
136                        subexpr: Box::new(CoreExpression::Sequence(vec![delim.clone(), e.clone()])),
137                        min: min.saturating_sub(1),
138                        max: max.map(|max| max.saturating_sub(1)),
139                    },
140                ]));
141            }
142            //Can parse count == 0
143            if *min == 0 {
144                options.push(CoreExpression::Sequence(vec![]));
145            }
146
147            let choice = CoreExpression::Choice(options);
148            if *trailing {
149                CoreExpression::Sequence(vec![
150                    choice,
151                    CoreExpression::Repeat {
152                        subexpr: Box::new(delim),
153                        min: 0,
154                        max: Some(1),
155                    },
156                ])
157            } else {
158                CoreExpression::Sequence(vec![choice])
159            }
160        }
161    }
162}
163
164fn resugar_sort<'src>(
165    ast: &'src SyntaxFileAst,
166    sort: &'src Sort,
167    pair: ParsePairRaw,
168) -> ParsePairSort<'src> {
169    match pair {
170        ParsePairRaw::Choice(_, i, subpair) => ParsePairSort {
171            sort: &sort.name[..],
172            constructor_name: &sort.constructors[i].name[..],
173            constructor_value: resugar_expr(ast, &sort.constructors[i].expression, *subpair),
174        },
175        ParsePairRaw::Error(span) => ParsePairSort {
176            sort: &sort.name[..],
177            constructor_name: "ERROR",
178            constructor_value: ParsePairExpression::Error(span),
179        },
180        _ => unreachable!(),
181    }
182}
183
184fn resugar_expr<'src>(
185    ast: &'src SyntaxFileAst,
186    sort: &'src Expression,
187    pair: ParsePairRaw,
188) -> ParsePairExpression<'src> {
189    match (sort, pair) {
190        (Expression::Sort(name), ParsePairRaw::Name(span, val)) => ParsePairExpression::Sort(
191            span,
192            Box::new(resugar_sort(ast, ast.sorts.get(name).unwrap(), *val)),
193        ),
194        (Expression::Sequence(exprs), ParsePairRaw::List(span, vals)) => ParsePairExpression::List(
195            span,
196            exprs
197                .iter()
198                .zip(vals.into_iter())
199                .map(|(e, v)| resugar_expr(ast, e, v))
200                .collect_vec(),
201        ),
202        (Expression::Repeat { e: c, .. }, ParsePairRaw::List(span, vals)) => {
203            ParsePairExpression::List(
204                span,
205                vals.into_iter()
206                    .map(|v| resugar_expr(ast, c, v))
207                    .collect_vec(),
208            )
209        }
210        (Expression::CharacterClass(_), ParsePairRaw::Empty(span)) => {
211            ParsePairExpression::Empty(span)
212        }
213        (Expression::Choice(constructors), ParsePairRaw::Choice(span, i, expr)) => {
214            ParsePairExpression::Choice(
215                span,
216                i,
217                Box::new(resugar_expr(ast, &constructors[i], *expr)),
218            )
219        }
220        (Expression::Literal(_), ParsePairRaw::List(span, _)) => ParsePairExpression::Empty(span),
221        (Expression::Delimited { e, max, .. }, ParsePairRaw::List(span, list)) => {
222            //If max is 0, empty list
223            if !max.is_none() && max.unwrap() == 0 {
224                return ParsePairExpression::List(span, vec![]);
225            };
226            //Get choice
227            let (i, choice) =
228                if let ParsePairRaw::Choice(_, i, choice) = list.into_iter().next().unwrap() {
229                    (i, choice)
230                } else {
231                    return ParsePairExpression::Error(span);
232                };
233            //If choice was not 0, empty list
234            if i != 0 {
235                return ParsePairExpression::List(span, vec![]);
236            };
237            //Find elements inside choice
238            let seq = if let ParsePairRaw::List(_, seq) = *choice {
239                seq
240            } else {
241                return ParsePairExpression::Error(span);
242            };
243
244            let mut result = vec![];
245            let mut seq_iter = seq.into_iter();
246
247            //Inside choice is first an expr, then a repeat of seq (delim, expr)
248            //We first find the first expr
249            let seq0 = seq_iter.next().unwrap();
250            result.push(resugar_expr(ast, e, seq0));
251
252            //See if the rest of the expr is present
253            let next = seq_iter.next();
254            if next.is_none() {
255                return ParsePairExpression::List(span, result);
256            }
257            //It is present, lets get the list of them
258            let seq1 = if let ParsePairRaw::List(_, list) = next.unwrap() {
259                list
260            } else {
261                return ParsePairExpression::Error(span);
262            };
263            //Map each element in the list to get the expr
264            seq1.into_iter().for_each(|pair| {
265                result.push(if let ParsePairRaw::List(span, list) = pair {
266                    if list.len() < 2 {
267                        ParsePairExpression::Error(span)
268                    } else {
269                        resugar_expr(ast, e, list.into_iter().nth(1).unwrap())
270                    }
271                } else {
272                    ParsePairExpression::Error(pair.span())
273                });
274            });
275
276            ParsePairExpression::List(span, result)
277        }
278        (_, ParsePairRaw::Error(span)) => ParsePairExpression::Error(span),
279        (_, _) => unreachable!(),
280    }
281}