pag_parser/frontend/
syntax.rs

1// Copyright (c) 2023 Paguroidea Developers
2//
3// Licensed under the Apache License, Version 2.0
4// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
5// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. All files in the project carrying such notice may not be copied,
7// modified, or distributed except according to those terms.
8
9use std::collections::HashMap;
10
11use crate::{
12    core_syntax::BindingContext,
13    core_syntax::{ParserRule, Term, TermArena, TermPtr},
14    nf::Tag,
15    span_errors,
16    type_system::{type_check, TypeError},
17    utilities::{unreachable_branch, Symbol},
18};
19
20use super::{lexical::LexerDatabase, Error, SurfaceSyntaxTree, WithSpan};
21
22pub struct Parser<'src, 'a> {
23    pub entrypoint: Symbol<'src>,
24    pub arena: &'a TermArena<'src, 'a>,
25    pub bindings: BindingContext<'src, 'a>,
26    pub symbol_table: HashMap<&'src str, WithSpan<'src, Symbol<'src>>>,
27    pub lexer_database: LexerDatabase<'src>,
28}
29
30impl<'src, 'a> Parser<'src, 'a> {
31    pub fn type_check(&self) -> Vec<TypeError<'src>> {
32        let target = unsafe { self.bindings.get(&self.entrypoint).unwrap_unchecked() };
33        type_check(&self.bindings, target.term, self.entrypoint)
34    }
35    pub fn is_active(&self, tag: &Tag<'src>) -> bool {
36        !tag.is_versioned()
37            && self
38                .bindings
39                .get(&tag.symbol())
40                .map(|x| x.active)
41                .unwrap_or(false)
42    }
43}
44
45pub fn construct_parser<'src, 'a>(
46    arena: &'a TermArena<'src, 'a>,
47    lexer_database: LexerDatabase<'src>,
48    sst: &WithSpan<'src, SurfaceSyntaxTree<'src>>,
49) -> Result<Parser<'src, 'a>, Vec<WithSpan<'src, Error<'src>>>> {
50    let mut parser = Parser {
51        entrypoint: Symbol::new(""),
52        arena,
53        bindings: HashMap::new(),
54        lexer_database,
55        symbol_table: HashMap::new(),
56    };
57    let mut errs = match construct_symbol_table(&mut parser, sst) {
58        Ok(()) => vec![],
59        Err(errs) => errs,
60    };
61    match &sst.node {
62        SurfaceSyntaxTree::Parser { entrypoint, rules } => {
63            let entrypoint = match parser.symbol_table.get(entrypoint.span.as_str()) {
64                Some(entrypoint) => entrypoint.clone(),
65                None => {
66                    errs.extend(span_errors!(
67                        UndefinedParserRuleReference,
68                        entrypoint.span,
69                        entrypoint.span.as_str(),
70                    ));
71                    return Err(errs);
72                }
73            };
74            for i in rules.iter() {
75                match &i.node {
76                    SurfaceSyntaxTree::ParserDefinition { active, name, expr } => {
77                        match construct_core_syntax_tree(&parser, expr) {
78                            Ok(expr) => {
79                                let name = unsafe {
80                                    parser
81                                        .symbol_table
82                                        .get(name.span.as_str())
83                                        .unwrap_unchecked()
84                                };
85                                parser.bindings.insert(
86                                    name.node,
87                                    ParserRule {
88                                        active: *active,
89                                        term: parser.arena.alloc(WithSpan {
90                                            span: i.span,
91                                            node: expr.node.clone(),
92                                        }),
93                                    },
94                                );
95                            }
96                            Err(e) => errs.extend(e),
97                        }
98                    }
99                    SurfaceSyntaxTree::ParserFixpoint { active, name, expr } => {
100                        match construct_core_syntax_tree(&parser, expr) {
101                            Ok(expr) => {
102                                let name = unsafe {
103                                    parser
104                                        .symbol_table
105                                        .get(name.span.as_str())
106                                        .unwrap_unchecked()
107                                };
108                                parser.bindings.insert(
109                                    name.node,
110                                    ParserRule {
111                                        active: *active,
112                                        term: parser.arena.alloc(WithSpan {
113                                            span: i.span,
114                                            node: Term::Fix(name.node, expr),
115                                        }),
116                                    },
117                                );
118                            }
119                            Err(e) => errs.extend(e),
120                        }
121                    }
122                    _ => unreachable_branch!(
123                        "parser rule should only contains definitions or fixpoints"
124                    ),
125                }
126            }
127            parser.entrypoint = entrypoint.node;
128            if !errs.is_empty() {
129                return Err(errs);
130            }
131        }
132        _ => unreachable_branch!("sst should be a parser"),
133    }
134    Ok(parser)
135}
136
137fn construct_symbol_table<'src>(
138    context: &mut Parser<'src, '_>,
139    sst: &WithSpan<'src, SurfaceSyntaxTree<'src>>,
140) -> Result<(), Vec<WithSpan<'src, Error<'src>>>> {
141    match &sst.node {
142        SurfaceSyntaxTree::Parser { rules, .. } => {
143            for rule in rules {
144                match &rule.node {
145                    SurfaceSyntaxTree::ParserFixpoint { name, .. }
146                    | SurfaceSyntaxTree::ParserDefinition { name, .. } => {
147                        if let Some(previous) = context.symbol_table.get(name.span.as_str()) {
148                            return Err(span_errors!(
149                                MultipleDefinition,
150                                name.span,
151                                name.span.as_str(),
152                                previous.span,
153                            ));
154                        } else {
155                            context.symbol_table.insert(
156                                name.span.as_str(),
157                                WithSpan {
158                                    span: name.span,
159                                    node: Symbol::new(name.span.as_str()),
160                                },
161                            );
162                        }
163                    }
164                    _ => unreachable_branch!(
165                        "parser rule should only contains definitions or fixpoints"
166                    ),
167                }
168            }
169            Ok(())
170        }
171        _ => unreachable_branch!("sst should be a parser"),
172    }
173}
174
175fn construct_core_syntax_tree<'src, 'a>(
176    translation_context: &Parser<'src, 'a>,
177    sst: &WithSpan<'src, SurfaceSyntaxTree<'src>>,
178) -> Result<TermPtr<'src, 'a>, Vec<WithSpan<'src, Error<'src>>>> {
179    match &sst.node {
180        SurfaceSyntaxTree::ParserAlternative { lhs, rhs } => {
181            let lhs = construct_core_syntax_tree(translation_context, lhs);
182            let rhs = construct_core_syntax_tree(translation_context, rhs);
183            match (lhs, rhs) {
184                (Ok(lhs), Ok(rhs)) => Ok(translation_context.arena.alloc(WithSpan {
185                    span: sst.span,
186                    node: crate::core_syntax::Term::Alternative(lhs, rhs),
187                })),
188                (Ok(_), Err(rhs)) => Err(rhs),
189                (Err(lhs), Ok(_)) => Err(lhs),
190                (Err(mut lhs), Err(mut rhs)) => {
191                    lhs.append(&mut rhs);
192                    Err(lhs)
193                }
194            }
195        }
196        SurfaceSyntaxTree::ParserSequence { lhs, rhs } => {
197            let lhs = construct_core_syntax_tree(translation_context, lhs);
198            let rhs = construct_core_syntax_tree(translation_context, rhs);
199            match (lhs, rhs) {
200                (Ok(lhs), Ok(rhs)) => Ok(translation_context.arena.alloc(WithSpan {
201                    span: sst.span,
202                    node: crate::core_syntax::Term::Sequence(lhs, rhs),
203                })),
204                (Ok(_), Err(rhs)) => Err(rhs),
205                (Err(lhs), Ok(_)) => Err(lhs),
206                (Err(mut lhs), Err(mut rhs)) => {
207                    lhs.append(&mut rhs);
208                    Err(lhs)
209                }
210            }
211        }
212        SurfaceSyntaxTree::ParserStar { inner } => {
213            let symbol = Symbol::new(sst.span.as_str());
214            let inner = construct_core_syntax_tree(translation_context, inner)?;
215            // \x . (i ~ x) | epsilon
216            let sequence = translation_context.arena.alloc(WithSpan {
217                span: sst.span,
218                node: crate::core_syntax::Term::Sequence(
219                    inner,
220                    translation_context.arena.alloc(WithSpan {
221                        span: sst.span,
222                        node: crate::core_syntax::Term::ParserRef(symbol),
223                    }),
224                ),
225            });
226            let alternative = translation_context.arena.alloc(WithSpan {
227                span: sst.span,
228                node: crate::core_syntax::Term::Alternative(
229                    sequence,
230                    translation_context.arena.alloc(WithSpan {
231                        span: sst.span,
232                        node: crate::core_syntax::Term::Epsilon,
233                    }),
234                ),
235            });
236            Ok(translation_context.arena.alloc(WithSpan {
237                span: sst.span,
238                node: crate::core_syntax::Term::Fix(symbol, alternative),
239            }))
240        }
241        SurfaceSyntaxTree::ParserPlus { inner } => {
242            let symbol = Symbol::new(sst.span.as_str());
243            let inner = construct_core_syntax_tree(translation_context, inner)?;
244            // \x . (i ~ x) | epsilon
245            let sequence = translation_context.arena.alloc(WithSpan {
246                span: sst.span,
247                node: crate::core_syntax::Term::Sequence(
248                    inner,
249                    translation_context.arena.alloc(WithSpan {
250                        span: sst.span,
251                        node: crate::core_syntax::Term::ParserRef(symbol),
252                    }),
253                ),
254            });
255            let alternative = translation_context.arena.alloc(WithSpan {
256                span: sst.span,
257                node: crate::core_syntax::Term::Alternative(
258                    sequence,
259                    translation_context.arena.alloc(WithSpan {
260                        span: sst.span,
261                        node: crate::core_syntax::Term::Epsilon,
262                    }),
263                ),
264            });
265            let fixpoint = translation_context.arena.alloc(WithSpan {
266                span: sst.span,
267                node: crate::core_syntax::Term::Fix(symbol, alternative),
268            });
269            Ok(translation_context.arena.alloc(WithSpan {
270                span: sst.span,
271                node: crate::core_syntax::Term::Sequence(inner, fixpoint),
272            }))
273        }
274        SurfaceSyntaxTree::ParserOptional { inner } => {
275            let inner = construct_core_syntax_tree(translation_context, inner)?;
276            Ok(translation_context.arena.alloc(WithSpan {
277                span: sst.span,
278                node: crate::core_syntax::Term::Alternative(
279                    inner,
280                    translation_context.arena.alloc(WithSpan {
281                        span: sst.span,
282                        node: crate::core_syntax::Term::Epsilon,
283                    }),
284                ),
285            }))
286        }
287        SurfaceSyntaxTree::Bottom => Ok(translation_context.arena.alloc(WithSpan {
288            span: sst.span,
289            node: crate::core_syntax::Term::Bottom,
290        })),
291        SurfaceSyntaxTree::Empty => Ok(translation_context.arena.alloc(WithSpan {
292            span: sst.span,
293            node: crate::core_syntax::Term::Epsilon,
294        })),
295        SurfaceSyntaxTree::ParserRuleRef { name } => {
296            let name = name.span.as_str();
297            match translation_context.symbol_table.get(name) {
298                Some(target) => Ok(translation_context.arena.alloc(WithSpan {
299                    span: sst.span,
300                    node: crate::core_syntax::Term::ParserRef(target.node),
301                })),
302                None => Err(span_errors!(UndefinedParserRuleReference, sst.span, name,)),
303            }
304        }
305        SurfaceSyntaxTree::LexicalRuleRef { name } => {
306            let name = name.span.as_str();
307            match translation_context.lexer_database.symbol_table.get(name) {
308                Some(target) => Ok(translation_context.arena.alloc(WithSpan {
309                    span: sst.span,
310                    node: crate::core_syntax::Term::LexerRef(*target),
311                })),
312                None => Err(span_errors!(UndefinedLexicalReference, sst.span, name,)),
313            }
314        }
315        _ => unreachable_branch!(
316            "core syntax tree construction should not be called on this node: {}",
317            sst.span.as_str()
318        ),
319    }
320}