pag_parser/
lib.rs

1// Copyright (c) 2023 Paguroidea Developers
2//
3// Licensed under the Apache License, Version 2.0
4// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
5// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. All files in the project carrying such notice may not be copied,
7// modified, or distributed except according to those terms.
8
9use std::ops::Range;
10
11use ariadne::{Color, Report, ReportKind, Source};
12use frontend::{lexical::LexerDatabase, GrammarDefinitionError, WithSpan};
13use fusion::fusion_parser;
14use proc_macro2::TokenStream;
15use quote::format_ident;
16use typed_arena::Arena;
17use utilities::unreachable_branch;
18
19use crate::{
20    core_syntax::TermArena,
21    frontend::syntax::construct_parser,
22    nf::{
23        fully_normalize, merge_inactive_rules, remove_unreachable_rules, semi_normalize,
24        NormalForms, Tag, TagAssigner,
25    },
26};
27
28pub mod core_syntax;
29pub mod frontend;
30mod fusion;
31mod nf;
32pub mod type_system;
33pub mod utilities;
34
35pub enum Error<'src> {
36    GrammarDefinitionError(GrammarDefinitionError<'src>),
37    FrontendErrors(Vec<WithSpan<'src, frontend::Error<'src>>>),
38    TypeErrors(Vec<type_system::TypeError<'src>>),
39}
40
41impl<'src> From<Vec<WithSpan<'src, frontend::Error<'src>>>> for Error<'src> {
42    fn from(errors: Vec<WithSpan<'src, frontend::Error<'src>>>) -> Self {
43        Error::FrontendErrors(errors)
44    }
45}
46
47impl<'src> From<Vec<type_system::TypeError<'src>>> for Error<'src> {
48    fn from(errors: Vec<type_system::TypeError<'src>>) -> Self {
49        Error::TypeErrors(errors)
50    }
51}
52
53impl<'src> Error<'src> {
54    pub fn report_stderr(&self, input_name: &str, input: &'src str) -> Result<(), std::io::Error> {
55        let mut cache = (input_name, Source::from(input));
56        for i in self.to_reports(input_name) {
57            i.eprint(&mut cache)?;
58        }
59        Ok(())
60    }
61    pub fn report_stdout(&self, input_name: &str, input: &'src str) -> Result<(), std::io::Error> {
62        let mut cache = (input_name, Source::from(input));
63        for i in self.to_reports(input_name) {
64            i.print(&mut cache)?;
65        }
66        Ok(())
67    }
68    pub fn to_reports<'a>(&self, input_name: &'a str) -> Vec<Report<'a, (&'a str, Range<usize>)>> {
69        match self {
70            Error::GrammarDefinitionError(e) => {
71                vec![match e {
72                    GrammarDefinitionError::SyntaxError(x) => {
73                        let span = match x.location {
74                            pest::error::InputLocation::Pos(x) => x..x+1,
75                            pest::error::InputLocation::Span((x, y)) => x..y,
76                        };
77                        Report::build(ReportKind::Error, input_name, span.start)
78                            .with_message("Syntax error in grammar definition")
79                            .with_label(ariadne::Label::new((input_name, span))
80                                .with_message(format!("{}", x.variant.message()))
81                                .with_color(Color::Red))
82                            .finish()
83                    },
84                    GrammarDefinitionError::FormatError { span, message } => {
85                        Report::build(ReportKind::Error, input_name, span.start())
86                            .with_message("Format error in grammar definition")
87                            .with_label(ariadne::Label::new((input_name, span.start()..span.end()))
88                                .with_message(format!("{}", message))
89                                .with_color(Color::Red))
90                            .finish()
91                    },
92                    GrammarDefinitionError::ParserLogicError(e) => {
93                        Report::build(ReportKind::Error, input_name, 0)
94                            .with_message(format!("Internal logical error when parsing grammar definition {}", e))
95                            .finish()
96                    },
97                    GrammarDefinitionError::UnexpectedEOI(e) => {
98                        Report::build(ReportKind::Error, input_name, 0)
99                            .with_message(format!("Internal logical error when parsing grammar definition, pest parser failed to give {}", e))
100                            .finish()
101                    },
102                }]
103            },
104            Error::FrontendErrors(errors) => errors
105                .iter()
106                .map(|e|  {
107                    match &e.node {
108                        frontend::Error::InternalLogicalError(msg) => {
109                            Report::build(ReportKind::Error, input_name, e.span.start())
110                                .with_message("Internal logical error encountered")
111                                .with_label(ariadne::Label::new((input_name, e.span.start()..e.span.end()))
112                                    .with_message(msg)
113                                    .with_color(Color::Red))
114                                .finish()
115                        },
116                        frontend::Error::MultipleDefinition(x, y) => {
117                            Report::build(ReportKind::Error, input_name, e.span.start().min(y.start()))
118                                .with_message(format!("Multiple definition of {}", x))
119                                .with_label(ariadne::Label::new((input_name, y.start()..y.end()))
120                                    .with_message("first definition")
121                                    .with_color(Color::Green))
122                                .with_label(ariadne::Label::new((input_name, e.span.start()..e.span.end()))
123                                    .with_message("second definition")
124                                    .with_color(Color::Blue))
125                                .finish()
126                        },
127                        frontend::Error::InvalidLexicalReference(name) => {
128                            Report::build(ReportKind::Error, input_name, e.span.start())
129                                .with_message("Invalid lexical reference")
130                                .with_label(ariadne::Label::new((input_name, e.span.start()..e.span.end()))
131                                    .with_message(format!("referencing lexical rule {} is not allowed here", name))
132                                    .with_color(Color::Red))
133                                .finish()
134                        },
135                        frontend::Error::UndefinedLexicalReference(name) => {
136                            Report::build(ReportKind::Error, input_name, e.span.start())
137                                .with_message("Undefined lexical reference")
138                                .with_label(ariadne::Label::new((input_name, e.span.start()..e.span.end()))
139                                    .with_message(format!("lexcical rule {} is undefined", name))
140                                    .with_color(Color::Red))
141                                .finish()
142                        },
143                        frontend::Error::UndefinedParserRuleReference(name) => {
144                            Report::build(ReportKind::Error, input_name, e.span.start())
145                                .with_message("Undefined parser rule reference")
146                                .with_label(ariadne::Label::new((input_name, e.span.start()..e.span.end()))
147                                    .with_message(format!("parser rule {} is undefined", name))
148                                    .with_color(Color::Red))
149                                .finish()
150                        },
151                        frontend::Error::MultipleSkippingRule(name) => {
152                            Report::build(ReportKind::Error, input_name, e.span.start())
153                                .with_message("Skipping lexical rule is already defined")
154                                .with_label(ariadne::Label::new((input_name, e.span.start()..e.span.end()))
155                                    .with_message(format!("this definition conflicts with previous definition for {name}"))
156                                    .with_color(Color::Red))
157                                .finish()
158                        },
159                        frontend::Error::NullableToken(name) => {
160                            Report::build(ReportKind::Error, input_name, e.span.start())
161                                .with_message("Nullable token detected")
162                                .with_label(ariadne::Label::new((input_name, e.span.start()..e.span.end()))
163                                    .with_message(format!("token {} is nullable", name))
164                                    .with_color(Color::Red))
165                                .finish()
166                        },
167                    }
168                })
169                .collect::<Vec<_>>(),
170            Error::TypeErrors(errors) => errors
171                .iter()
172                .map(|e| {
173                    match e {
174                        type_system::TypeError::SequentialUniquenessViolation { lhs, rhs, total } => {
175                            Report::build(ReportKind::Error, input_name, total.start())
176                                .with_message("When type checking a sequence of rules, the following rules are ambiguous")
177                                .with_label(ariadne::Label::new((input_name, lhs.0.start()..lhs.0.end()))
178                                    .with_message(format!("type info for left-hand side: nullable: {}, first set: {{{}}}, follow set: {{{}}}",
179                                    lhs.1.nullable, lhs.1.first.iter().map(|x|x.name()).collect::<Vec<_>>().join(", "),
180                                    lhs.1.follow.iter().map(|x|x.name()).collect::<Vec<_>>().join(", ")
181                                ))
182                                    .with_color(Color::Green))
183                                .with_label(ariadne::Label::new((input_name, rhs.0.start()..rhs.0.end()))
184                                    .with_message(format!("type info for right-hand side: nullable: {}, first set: {{{}}}, follow set: {{{}}}",
185                                    rhs.1.nullable, rhs.1.first.iter().map(|x|x.name()).collect::<Vec<_>>().join(", "),
186                                    rhs.1.follow.iter().map(|x|x.name()).collect::<Vec<_>>().join(", ")
187                                ))
188                                    .with_color(Color::Blue))
189                                .finish()
190                        },
191                        type_system::TypeError::DisjunctiveUniquenessViolation { lhs, rhs, total } => {
192                            Report::build(ReportKind::Error, input_name, total.start())
193                                .with_message("When type checking an alternation of rules, the following rules are ambiguous")
194                                .with_label(ariadne::Label::new((input_name, lhs.0.start()..lhs.0.end()))
195                                    .with_message(format!("type info for left-hand side: nullable {}, first set: {}, follow set: {}",
196                                    lhs.1.nullable, lhs.1.first.iter().map(|x|x.name()).collect::<Vec<_>>().join(", "),
197                                    lhs.1.follow.iter().map(|x|x.name()).collect::<Vec<_>>().join(", ")
198                                ))
199                                    .with_color(Color::Green))
200                                .with_label(ariadne::Label::new((input_name, rhs.0.start()..rhs.0.end()))
201                                    .with_message(format!("type info for right-hand side: nullable {}, first set: {}, follow set: {}",
202                                    rhs.1.nullable, rhs.1.first.iter().map(|x|x.name()).collect::<Vec<_>>().join(", "),
203                                    rhs.1.follow.iter().map(|x|x.name()).collect::<Vec<_>>().join(", ")
204                                ))
205                                    .with_color(Color::Blue))
206                                .finish()
207                        },
208                        type_system::TypeError::UnguardedFixpoint(sym, s) => {
209                            Report::build(ReportKind::Error, input_name, s.start())
210                                .with_message("Unguarded fixpoint")
211                                .with_label(ariadne::Label::new((input_name,s.start()..s.end()))
212                                    .with_message(format!("fixpoint rule {} is not guarded -- your grammar is left-recursive", sym))
213                                    .with_color(Color::Red))
214                                .finish()
215                        },
216                        type_system::TypeError::UnresolvedReference(sym, s) => {
217                            Report::build(ReportKind::Error, input_name, s.start())
218                                .with_message("Unresolved reference")
219                                .with_label(ariadne::Label::new((input_name,s.start()..s.end()))
220                                    .with_message(format!("cannot resolve parser rule {} within context -- did you forget to put recursive rule into fixpoint", sym))
221                                    .with_color(Color::Red))
222                                .finish()
223                        },
224                    }
225                })
226                .collect::<Vec<_>>(),
227        }
228    }
229}
230
231pub fn generate_parser(input: &str) -> Result<TokenStream, Error> {
232    let sst = frontend::parse(input)?;
233    match &sst.node {
234        frontend::SurfaceSyntaxTree::Grammar { lexer, parser } => {
235            let lexer_database = LexerDatabase::new(lexer)?;
236            let nullable_errors = lexer_database.nullability_check();
237            if !nullable_errors.is_empty() {
238                return Err(Error::FrontendErrors(nullable_errors));
239            }
240            let term_arena = TermArena::new();
241            let parser = construct_parser(&term_arena, lexer_database, parser)?;
242            let type_errs = parser.type_check();
243            if !type_errs.is_empty() {
244                return Err(Error::TypeErrors(type_errs));
245            }
246            let nf_arena = Arena::new();
247            let mut nfs = NormalForms::new();
248            let mut assigner = TagAssigner::new();
249            for (i, rule) in parser.bindings.iter() {
250                semi_normalize(
251                    &rule.term.node,
252                    Tag::new(*i),
253                    &nf_arena,
254                    &mut nfs,
255                    &mut assigner,
256                    &parser,
257                );
258            }
259            fully_normalize(&nf_arena, &mut nfs);
260            merge_inactive_rules(&mut nfs, &parser, &nf_arena);
261            remove_unreachable_rules(&mut nfs, &parser);
262            let parser_routines = fusion_parser(&nfs, &parser);
263            let entrypoint = format_ident!("parse_{}", parser.entrypoint.name());
264            Ok(quote::quote! {
265                #![allow(
266                    non_snake_case,
267                    dead_code,
268                    non_camel_case_types,
269                    unused_variables,
270                    unused_mut,
271                    unreachable_patterns,
272                    unreachable_code,
273                    clippy::identity_op,
274                    clippy::single_match,
275                    clippy::never_loop,
276                    clippy::match_single_binding,
277                )]
278                #parser_routines
279                pub fn parse(input: &str) -> Result<ParserTree, Error> {
280                    #entrypoint(input, 0)
281                }
282            })
283        }
284        _ => unreachable_branch!("the entrypoint of sst can only be Grammar"),
285    }
286}