1#[cfg(test)]
2mod tests;
3
4mod lexer;
5use std::rc::Rc;
6
7use lexer::Lexer;
8pub use lexer::Token;
9use miette::{Diagnostic, SourceSpan};
10use petr_ast::{Ast, Comment, ExprId, List, Module};
11use petr_utils::{IndexMap, SourceId, Span, SpannedItem, SymbolId, SymbolInterner};
12use thiserror::Error;
13
14#[derive(Error, Debug, PartialEq)]
15pub struct ParseError {
16 kind: ParseErrorKind,
17 help: Option<String>,
18}
19
20impl From<ParseErrorKind> for ParseError {
21 fn from(kind: ParseErrorKind) -> Self {
22 Self { kind, help: None }
23 }
24}
25
26impl ParseError {
27 pub fn with_help(
28 mut self,
29 help: Option<impl Into<String>>,
30 ) -> Self {
31 self.help = help.map(Into::into);
32 self
33 }
34}
35
36impl Diagnostic for ParseError {
37 fn help<'a>(&'a self) -> Option<Box<dyn std::fmt::Display + 'a>> {
38 self.help.as_ref().map(|x| -> Box<dyn std::fmt::Display> { Box::new(x) })
39 }
40
41 fn code<'a>(&'a self) -> Option<Box<dyn std::fmt::Display + 'a>> {
42 self.kind.code()
43 }
44
45 fn severity(&self) -> Option<miette::Severity> {
46 self.kind.severity()
47 }
48
49 fn url<'a>(&'a self) -> Option<Box<dyn std::fmt::Display + 'a>> {
50 self.kind.url()
51 }
52
53 fn source_code(&self) -> Option<&dyn miette::SourceCode> {
54 self.kind.source_code()
55 }
56
57 fn labels(&self) -> Option<Box<dyn Iterator<Item = miette::LabeledSpan> + '_>> {
58 self.kind.labels()
59 }
60
61 fn related<'a>(&'a self) -> Option<Box<dyn Iterator<Item = &'a dyn Diagnostic> + 'a>> {
62 self.kind.related()
63 }
64
65 fn diagnostic_source(&self) -> Option<&dyn Diagnostic> {
66 self.kind.diagnostic_source()
67 }
68}
69
70impl std::fmt::Display for ParseError {
71 fn fmt(
72 &self,
73 f: &mut std::fmt::Formatter<'_>,
74 ) -> std::fmt::Result {
75 write!(f, "{}", self.kind)
76 }
77}
78
79#[derive(Error, Debug, Diagnostic, PartialEq)]
80pub enum ParseErrorKind {
81 #[error("Unmatched parenthesis")]
82 UnmatchedParenthesis,
83 #[error("Expected identifier, found {0}")]
84 ExpectedIdentifier(String),
85 #[error("Expected token {0}, found {1}")]
86 ExpectedToken(Token, Token),
87 #[error("Expected one of tokens {}; found {1}", format_toks(.0))]
88 ExpectedOneOf(Vec<Token>, Token),
89 #[error("Internal error in parser. Please file an issue on GitHub. {0}")]
90 InternalError(String),
91 #[error("File name could not be converted to module name. petr source names should be a valid identifier.")]
92 #[help(
93 "Identifiers cannot begin with numbers, contain spaces, or contain symbols other than an underscore or hyphen.\
94 Hyphens in file names are transformed into underscores."
95 )]
96 InvalidIdentifier(String),
97 #[error("Internal error in parser. Please file an issue on GitHub. A span was joined with a span from another file.")]
98 InternalSpanError(#[label] SourceSpan, #[label] SourceSpan),
99}
100
101impl ParseErrorKind {
102 pub fn into_err(self) -> ParseError {
103 self.into()
104 }
105}
106
107fn format_toks(toks: &[Token]) -> String {
108 let mut buf = toks.iter().take(toks.len() - 1).map(|t| format!("{}", t)).collect::<Vec<_>>().join(", ");
109 match toks.len() {
110 2 => {
111 buf.push_str(&format!(" or {}", toks.last().unwrap()));
112 },
113 x if x > 2 => {
114 buf.push_str(&format!(", or {}", toks.last().unwrap()));
115 },
116 _ => (),
117 }
118 buf
119}
120
121type Result<T> = std::result::Result<T, ParseError>;
122
123pub struct Parser {
124 interner: SymbolInterner,
125 expr_id_assigner: usize,
128 lexer: Lexer,
129 errors: Vec<SpannedItem<ParseError>>,
130 comments: Vec<SpannedItem<Comment>>,
131 peek: Option<SpannedItem<Token>>,
132 source_map: IndexMap<SourceId, (&'static str, &'static str)>,
134 help: Vec<String>,
135}
136
137impl Parser {
138 pub fn push_error(
139 &mut self,
140 err: SpannedItem<ParseErrorKind>,
141 ) {
142 if self.help.is_empty() {
143 return self.errors.push(err.map(|err| err.into_err()));
144 }
145 let mut help_text = Vec::with_capacity(self.help.len());
146 for (indentation, help) in self.help.iter().enumerate() {
147 let is_last = indentation == self.help.len() - 1;
148 let text = format!(
149 "{}{}{}{help}",
150 " ".repeat(indentation),
151 if indentation == 0 { "" } else { "↪ " },
152 if is_last { "expected " } else { "while parsing " }
153 );
154 help_text.push(text);
155 }
156 let err = err.map(|err| err.into_err().with_help(Some(help_text.join("\n"))));
157 self.errors.push(err);
158 }
159
160 pub fn slice(&self) -> &str {
161 self.lexer.slice()
162 }
163
164 pub fn intern(
165 &mut self,
166 internee: Rc<str>,
167 ) -> SymbolId {
168 self.interner.insert(internee)
169 }
170
171 pub fn span(&self) -> Span {
172 self.lexer.span()
173 }
174
175 pub fn peek(&mut self) -> SpannedItem<Token> {
176 if let Some(ref peek) = self.peek {
177 *peek
178 } else {
179 let item = self.advance();
180 self.peek = Some(item);
181 item
182 }
183 }
184
185 pub fn new_with_existing_interner_and_source_map(
186 sources: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
187 interner: SymbolInterner,
188 mut source_map: IndexMap<SourceId, (&'static str, &'static str)>,
189 ) -> Self {
190 let sources = sources
193 .into_iter()
194 .map(|(name, source)| -> (&'static str, &'static str) {
195 let name = name.into();
196 let source = source.into();
197 (Box::leak(name.into_boxed_str()), Box::leak(source.into_boxed_str()))
198 })
199 .collect::<Vec<_>>();
200 let sources_for_lexer = sources.iter().map(|(_, source)| *source);
201
202 let lexer = Lexer::new_with_offset_into_sources(sources_for_lexer, source_map.len());
203
204 for (name, source) in sources.into_iter() {
205 source_map.insert((name, source));
206 }
207
208 Self {
209 interner,
211 lexer,
212 errors: Default::default(),
213 comments: Default::default(),
214 peek: None,
215 source_map,
216 help: Default::default(),
217 expr_id_assigner: 0,
218 }
219 }
220
221 pub fn new_expr_id(&mut self) -> ExprId {
222 let id = self.expr_id_assigner;
223 self.expr_id_assigner += 1;
224 ExprId(id)
225 }
226
227 pub fn new(sources: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>) -> Self {
228 Self::new_with_existing_interner_and_source_map(sources, Default::default(), Default::default())
229 }
230
231 pub fn drain_comments(&mut self) -> Vec<Comment> {
232 self.comments.drain(..).map(|spanned_item| spanned_item.into_item()).collect()
233 }
234
235 #[allow(clippy::type_complexity)]
237 pub fn into_result(
238 mut self
239 ) -> (
240 Ast,
241 Vec<SpannedItem<ParseError>>,
242 SymbolInterner,
243 IndexMap<SourceId, (&'static str, &'static str)>,
244 ) {
245 let nodes: Vec<Module> = self.many::<Module>();
246 (Ast::new(nodes), self.errors, self.interner, self.source_map)
248 }
249
250 pub fn interner(&self) -> &SymbolInterner {
251 &self.interner
252 }
253
254 pub fn many<P: Parse>(&mut self) -> Vec<P> {
255 let mut buf = Vec::new();
256 loop {
257 if *self.peek().item() == Token::Eof {
258 break;
259 }
260 if let Some(parsed_item) = P::parse(self) {
261 buf.push(parsed_item);
262 } else {
263 break;
264 }
265 }
266 buf
267 }
268
269 pub fn sequence_zero_or_more<P: Parse>(
273 &mut self,
274 separator: Token,
275 ) -> Option<Vec<P>> {
276 self.with_help(format!("while parsing {separator} separated sequence"), |p| {
277 let mut buf = vec![];
278 loop {
279 let item = if buf.is_empty() {
282 let res = p.with_backtrack(|p| P::parse(p));
283 match res {
284 Ok(item) => Some(item),
285 Err(_) => {
286 break;
288 },
289 }
290 } else {
291 P::parse(p)
294 };
295 match item {
296 Some(item) => buf.push(item),
297 None => {
298 break;
299 },
300 }
301 if *p.peek().item() == separator {
302 p.advance();
303 } else {
304 break;
305 }
306 }
307
308 Some(buf)
309 })
310 }
311
312 pub fn sequence_one_or_more<P: Parse>(
317 &mut self,
318 separator: Token,
319 ) -> Option<Vec<P>> {
320 self.sequence(separator)
321 }
322
323 pub fn sequence<P: Parse>(
327 &mut self,
328 separator: Token,
329 ) -> Option<Vec<P>> {
330 let mut buf = vec![];
331 let errs = loop {
332 let item = self.with_backtrack(|p| P::parse(p));
333 match item {
334 Ok(item) => buf.push(item),
335 Err(errs) => {
336 break errs;
337 },
338 }
339 if *self.peek().item() == separator {
340 self.advance();
341 } else {
342 break vec![];
343 }
344 };
345 if buf.is_empty() {
346 for err in errs {
347 self.errors.push(err)
348 }
349 None
350 } else {
351 Some(buf)
352 }
353 }
354
355 pub fn advance(&mut self) -> SpannedItem<Token> {
356 if let Some(tok) = self.peek.take() {
357 return tok;
358 }
359 let next_tok = self.lexer.advance();
360 match *next_tok.item() {
361 Token::Newline => self.advance(),
362 Token::Comment => {
363 if let Some(comment) = self.parse::<SpannedItem<Comment>>() {
364 self.comments.push(comment);
365 }
366 self.advance()
367 },
368 _ => next_tok,
369 }
370 }
371
372 pub fn try_token(
374 &mut self,
375 tok: Token,
376 ) -> Option<SpannedItem<Token>> {
377 let peeked_token = self.peek();
378 if *peeked_token.item() == tok {
379 Some(self.advance())
380 } else {
381 None
382 }
383 }
384
385 pub fn token(
386 &mut self,
387 tok: Token,
388 ) -> Option<SpannedItem<Token>> {
389 self.with_help(format!("while parsing token {tok}"), |p| {
390 let peeked_token = p.peek();
391 if *peeked_token.item() == tok {
392 Some(p.advance())
393 } else {
394 let span = p.lexer.span();
395 p.push_error(span.with_item(ParseErrorKind::ExpectedToken(tok, *peeked_token.item())));
396 None
397 }
398 })
399 }
400
401 pub fn parse<P: Parse>(&mut self) -> Option<P> {
402 P::parse(self)
403 }
404
405 pub fn one_of<const N: usize>(
406 &mut self,
407 toks: [Token; N],
408 ) -> Option<SpannedItem<Token>> {
409 match self.peek().item() {
410 tok if toks.contains(tok) => self.token(*tok),
411 tok => {
412 let span = self.lexer.span();
413 if N == 1 {
414 self.push_error(span.with_item(ParseErrorKind::ExpectedToken(toks[0], *tok)));
415 } else {
416 self.push_error(span.with_item(ParseErrorKind::ExpectedOneOf(toks.to_vec(), *tok)));
417 }
418 None
419 },
420 }
421 }
422
423 pub fn errors(&self) -> &[SpannedItem<ParseError>] {
424 &self.errors
425 }
426
427 pub fn with_help<F, T>(
428 &mut self,
429 help_text: impl Into<String>,
430 f: F,
431 ) -> T
432 where
433 F: Fn(&mut Parser) -> T,
434 {
435 self.push_help(help_text);
436 let res = f(self);
437 self.pop_help();
438 res
439 }
440
441 fn push_help(
442 &mut self,
443 arg: impl Into<String>,
444 ) {
445 self.help.push(arg.into())
446 }
447
448 fn pop_help(&mut self) {
449 let _ = self.help.pop();
450 }
451
452 pub fn with_backtrack<F, T>(
458 &mut self,
459 f: F,
460 ) -> std::result::Result<T, Vec<SpannedItem<ParseError>>>
461 where
462 F: Fn(&mut Parser) -> Option<T>,
463 {
464 let checkpoint = self.checkpoint();
465 let res = f(self);
466 match res {
467 Some(res) => Ok(res),
468 None => Err(self.restore_checkpoint(checkpoint)),
469 }
470 }
471
472 fn checkpoint(&self) -> Checkpoint {
473 Checkpoint {
474 errors: self.errors.len(),
475 lexer: self.lexer.clone(),
476 peek: self.peek,
477 }
478 }
479
480 fn restore_checkpoint(
481 &mut self,
482 checkpoint: Checkpoint,
483 ) -> Vec<SpannedItem<ParseError>> {
484 self.lexer = checkpoint.lexer;
485 self.peek = checkpoint.peek;
486 self.errors.split_off(checkpoint.errors)
487 }
488
489 pub fn source_map(&self) -> &IndexMap<SourceId, (&'static str, &'static str)> {
490 &self.source_map
491 }
492}
493
494struct Checkpoint {
495 errors: usize,
496 lexer: Lexer,
497 peek: Option<SpannedItem<Token>>,
498}
499
500pub trait Parse: Sized {
501 fn parse(p: &mut Parser) -> Option<Self>;
502}
503
504impl<T> Parse for SpannedItem<T>
505where
506 T: Parse,
507{
508 fn parse(p: &mut Parser) -> Option<Self> {
509 let before_span = p.lexer.span();
510 let result = T::parse(p)?;
511
512 let mut after_span = p.lexer.span();
513 if after_span.source() != before_span.source() && after_span.span().offset() == 0 {
514 after_span = before_span.extend(p.source_map.get(before_span.source()).1.len());
526 } else if after_span.source() != before_span.source() {
527 let span = before_span.with_item(ParseErrorKind::InternalSpanError(after_span.span(), before_span.span()));
528 p.push_error(span);
529 return None;
530 }
531
532 Some(before_span.hi_to_hi(after_span).with_item(result))
534 }
535}
536
537impl Parse for List {
538 fn parse(p: &mut Parser) -> Option<Self> {
539 p.try_token(Token::OpenBracket)?;
540 let elements = p.sequence(Token::Comma)?;
541 p.token(Token::CloseBracket)?;
542 Some(List {
543 elements: elements.into_boxed_slice(),
544 })
545 }
546}