mech_syntax/
parser.rs

1// Parser
2// ========
3
4/// Sections:
5///   1. Prelude
6///   2. Parser combinators
7///   3. Recovery functions
8///   4. Public interface
9///   5. Error reporting
10
11// 1. Prelude
12// ------------
13
14use crate::*;
15use crate::functions::function_define;
16
17use mech_core::nodes::*;
18use mech_core::nodes::{SectionElement, MechString, Table};
19
20#[cfg(not(feature = "no-std"))] use core::fmt;
21#[cfg(feature = "no-std")] use alloc::fmt;
22#[cfg(feature = "no-std")] use alloc::string::String;
23#[cfg(feature = "no-std")] use alloc::vec::Vec;
24use nom::{
25  IResult,
26  branch::alt,
27  sequence::{tuple as nom_tuple, preceded},
28  combinator::{opt, eof, cut, peek},
29  multi::{many1, many_till, many0, separated_list1, separated_list0},
30  Err,
31  Err::Failure
32};
33
34use std::collections::HashMap;
35use colored::*;
36
37//use crate::*;
38use crate::{
39  TextFormatter,
40  ParseError,
41  ParseString,
42  ParseErrorDetail,
43  graphemes,
44  ParseResult,
45};
46
47// 2. Parser combinators
48// -----------------------
49
50/// Convert output of any parser into ParserNode::Null.
51/// Useful for working with `alt` combinator and error recovery functions.
52pub fn null<'a, F, O>(mut parser: F) ->
53  impl FnMut(ParseString<'a>) -> ParseResult<()>
54where
55  F: FnMut(ParseString<'a>) -> ParseResult<O>
56{
57  move |input: ParseString| match parser(input) {
58    Ok((remaining, _)) => Ok((remaining, ())),
59    Err(Err::Error(e)) => Err(Err::Error(e)),
60    Err(Err::Failure(e)) => Err(Err::Failure(e)),
61    x => panic!("Err::Incomplete is not supported"),
62  }
63}
64
65/// For parser p, run p and also output the range that p has matched
66/// upon success.
67pub fn range<'a, F, O>(mut parser: F) ->
68  impl FnMut(ParseString<'a>) -> ParseResult<(O, SourceRange)>
69where
70  F: FnMut(ParseString<'a>) -> ParseResult<O>
71{
72  move |input: ParseString| {
73    let start = input.loc();
74    match parser(input) {
75      Ok((remaining, o)) => {
76        let rng = SourceRange { start, end: remaining.loc(), };
77        Ok((remaining, (o, rng)))
78      },
79      Err(e) => Err(e),
80    }
81  }
82}
83
84#[macro_export]
85macro_rules! label {
86  ($parser:expr, $msg:expr) => {
87    (label_without_recovery($parser, ParseErrorDetail {
88      message: $msg, annotation_rngs: vec![]
89    }))
90  };
91
92  ($parser:expr, $msg:expr, $($rngs:expr),+) => {
93    (label_without_recovery($parser, ParseErrorDetail {
94      message: $msg, annotation_rngs: vec![$($rngs),+]
95    }))
96  };
97}
98
99#[macro_export]
100macro_rules! labelr {
101  ($parser:expr, $recovery_fn:expr, $msg:expr) => {
102    (label_with_recovery($parser, $recovery_fn, ParseErrorDetail {
103      message: $msg, annotation_rngs: vec![]
104    }))
105  };
106
107  ($parser:expr, $recovery_fn:expr, $msg:expr, $($rngs:expr),+) => {
108    (label_with_recovery($parser, $recovery_fn, ParseErrorDetail {
109      message: $msg, annotation_rngs: vec![$($rngs),+]
110    }))
111  };
112}
113
114/// Label without recovery function. Upgrade Err::Error to Err:Failure
115/// and override its context information.
116pub fn label_without_recovery<'a, F, O>(
117  mut parser: F,
118  error_detail: ParseErrorDetail,
119) ->
120  impl FnMut(ParseString<'a>) -> ParseResult<O>
121where
122  F: FnMut(ParseString<'a>) -> ParseResult<O>
123{
124  move |mut input: ParseString| {
125    let start = input.loc();
126    match parser(input) {
127      Err(Err::Error(mut e)) => {
128        e.cause_range = SourceRange { start, end: e.cause_range.end };
129        e.error_detail = error_detail.clone();
130        Err(Err::Failure(e))
131      }
132      x => x,
133    }
134  }
135}
136
137/// Label with recovery function. In addition to upgrading errors, the
138/// error is logged and recovery function will be run as an attempt to
139/// synchronize parser state.
140pub fn label_with_recovery<'a, F, O>(
141  mut parser: F,
142  mut recovery_fn: fn(ParseString<'a>) -> ParseResult<O>,
143  error_detail: ParseErrorDetail,
144) ->
145  impl FnMut(ParseString<'a>) -> ParseResult<O>
146where
147  F: FnMut(ParseString<'a>) -> ParseResult<O>
148{
149  move |mut input: ParseString| {
150    let start = input.loc();
151    match parser(input) {
152      Err(Err::Error(mut e)) => {
153        e.cause_range = SourceRange { start, end: e.cause_range.end };
154        e.error_detail = error_detail.clone();
155        e.log();
156        recovery_fn(e.remaining_input)
157      }
158      Err(Err::Failure(mut e)) => {
159        e.cause_range = SourceRange { start, end: e.cause_range.end };
160        //e.error_detail = error_detail.clone();
161        e.log();
162        recovery_fn(e.remaining_input)
163      },
164      x => x,
165    }
166  }
167}
168
169/// For parser p, return the `!!p` peek parsing expression.
170pub fn is<'a, F, O>(mut parser: F) ->
171  impl FnMut(ParseString<'a>) -> ParseResult<O>
172where
173  F: FnMut(ParseString<'a>) -> ParseResult<O>
174{
175  move |input: ParseString| {
176    let input_clone = input.clone();
177    match parser(input_clone) {
178      Ok((_, o)) => Ok((input, o)),
179      _ => Err(Err::Error(ParseError::new(input, "Unexpected character"))),
180    }
181  }
182}
183
184/// For parser p, return the `!p` peek parsing expression.
185pub fn is_not<'a, F, E>(mut parser: F) ->
186  impl FnMut(ParseString<'a>) -> ParseResult<()>
187where
188  F: FnMut(ParseString<'a>) -> ParseResult<E>
189{
190  move |input: ParseString| {
191    let input_clone = input.clone();
192    match parser(input_clone) {
193      Err(Err::Failure(_)) |
194      Err(Err::Error(_)) => Ok((input, ())),
195      _ => Err(Err::Error(ParseError::new(input, "Unexpected character")))
196    }
197  }
198}
199
200/// Return a terminal parsing expression that consumes `tag` from input.
201pub fn tag(tag: &'static str) -> impl Fn(ParseString) -> ParseResult<String> {
202  move |mut input: ParseString| {
203    if input.is_empty() {
204      return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")));
205    }
206    if let Some(matched) = input.consume_tag(tag) {
207      Ok((input, matched))
208    } else {
209      Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
210    }
211  }
212}
213
214// 3. Recovery functions
215// -----------------------
216
217// skip_till_eol := (!new_line, any)* ;
218pub fn skip_till_eol(input: ParseString) -> ParseResult<Token> {
219  let (input, matched) = many0(nom_tuple((
220    is_not(new_line),
221    any_token,
222  )))(input)?;
223  let mut matched: Vec<Token> = matched.into_iter().map(|(_, t)| t).collect(); 
224  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default()); 
225  Ok((input, tkn))
226}
227
228// skip_past_eol := skip_till_eol, new_line ;
229pub fn skip_past_eol(input: ParseString) -> ParseResult<Token> {
230  let (input, matched) = skip_till_eol(input)?;
231  let (input, nl) = new_line(input)?;
232  let matched = Token::merge_tokens(&mut vec![matched, nl]).unwrap_or(Token::default());
233  Ok((input, matched))
234}
235
236// skip-till-end-of-statement := *((!new-line, !";"), any) ;
237pub fn skip_till_end_of_statement(input: ParseString) -> ParseResult<Token> {
238  // If empty, return
239  if input.is_empty() {
240      return Ok((input, Token::default()));
241  }
242
243  // Consume until either newline or ;
244  let (input, matched) = many0(nom_tuple((
245      // is_not matches any char NOT in the set
246      is_not(alt((
247          new_line,
248          semicolon,
249      ))),
250      any_token,
251  )))(input)?;
252
253  let mut matched: Vec<Token> = matched.into_iter().map(|(_, t)| t).collect();
254  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default());
255
256  Ok((input, tkn))
257}
258
259// skip_till_section_element := skip_past_eol, (!section_element, skip_past_eol)* ;
260pub fn skip_till_section_element(input: ParseString) -> ParseResult<Token> {
261  if input.is_empty() {
262    return Ok((input, Token::default()));
263  }
264  let (input, matched) = skip_past_eol(input)?;
265  let (input, matched2) = many0(nom_tuple((
266    is_not(section_element),
267    skip_past_eol,
268  )))(input)?;
269  let mut matched: Vec<Token> = vec![matched];
270  matched.extend(matched2.into_iter().map(|(_, t)| t));
271  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default());
272  Ok((input, tkn))
273}
274
275pub fn skip_till_paragraph_element(input: ParseString) -> ParseResult<Token> {
276  // if it's empty, return
277  if input.is_empty() {
278    return Ok((input, Token::default()));
279  }
280  // Otherwise, consume tokens until we reach a paragraph element
281  let (input, matched) = many0(nom_tuple((
282    is_not(paragraph_element),
283    any_token,
284  )))(input)?;
285  let mut matched: Vec<Token> = matched.into_iter().map(|(_, t)| t).collect(); 
286  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default());
287  Ok((input, tkn))
288}
289
290// skip_spaces := space* ;
291pub fn skip_spaces(input: ParseString) -> ParseResult<()> {
292  let (input, _) = many0(space)(input)?;
293  Ok((input, ()))
294}
295
296// skip_nil := ;
297pub fn skip_nil(input: ParseString) -> ParseResult<()> {
298  Ok((input, ()))
299}
300
301// skip_empty_mech_directive := ;
302pub fn skip_empty_mech_directive(input: ParseString) -> ParseResult<String> {
303  Ok((input, String::from("mech:")))
304}
305
306// recovery function for Recoverable nodes with customizable skip function
307pub fn recover<T: Recoverable, F>(input: ParseString, skip_fn: F) -> ParseResult<T>
308where
309  F: Fn(ParseString) -> ParseResult<Token>,
310{
311  let start = input.loc();
312  let (input, matched) = skip_fn(input)?;
313  let end = input.loc();
314  Ok((input, T::error_placeholder(matched, SourceRange { start, end })))
315}
316
317// 4. Public interface
318// ---------------------
319
320// mech_code_alt := fsm_specification | fsm_implementation | function_define | statement | expression | comment ;
321pub fn mech_code_alt(input: ParseString) -> ParseResult<MechCode> {
322  let (input, _) = whitespace0(input)?;
323  let parsers: Vec<(&str, Box<dyn Fn(ParseString) -> ParseResult<MechCode>>)> = vec![
324    // ("fsm_specification", Box::new(|i| fsm_specification(i).map(|(i, v)| (i, MechCode::FsmSpecification(v))))),
325    // ("fsm_implementation", Box::new(|i| fsm_implementation(i).map(|(i, v)| (i, MechCode::FsmImplementation(v))))),
326    // ("function_define", Box::new(|i| function_define(i).map(|(i, v)| (i, MechCode::FunctionDefine(v))))),
327    ("statement",   Box::new(|i| statement(i).map(|(i, v)| (i, MechCode::Statement(v))))),
328    ("expression",  Box::new(|i| expression(i).map(|(i, v)| (i, MechCode::Expression(v))))),
329    ("comment",     Box::new(|i| comment(i).map(|(i, v)| (i, MechCode::Comment(v))))),
330  ];
331  match alt_best(input, &parsers) {
332    Ok((input, code)) => {
333      return Ok((input, code));
334    }
335    Err(e) => {
336      return Err(e);
337    }
338  };
339
340}
341
342/// code-terminal := *space-tab, ?(?semicolon, *space-tab, comment), (new-line | ";" | eof), *whitespace ;
343pub fn code_terminal(input: ParseString) -> ParseResult<Option<Comment>> {
344  let (input, _) = many0(space_tab)(input)?;
345  let (input, cmmnt) = opt(tuple((opt(semicolon), many0(space_tab), comment)))(input)?;
346  let (input, _) = alt((null(new_line), null(semicolon), null(eof)))(input)?;
347  let (input, _) = whitespace0(input)?;
348  let cmmt = match cmmnt {
349    Some((_, _, cmnt)) => Some(cmnt),
350    None => None,
351  };
352  Ok((input, cmmt))
353}
354
355// mech-code-block := +(mech-code, code-terminal) ;
356pub fn mech_code(input: ParseString) -> ParseResult<Vec<(MechCode,Option<Comment>)>> {
357  let mut output = vec![];
358  let mut new_input = input.clone();
359  loop {
360
361    if peek(not_mech_code)(new_input.clone()).is_ok() {
362      if output.len() > 0 {
363        return Ok((new_input, output));
364      } else {
365        let e = ParseError::new(new_input, "Unexpected character");
366        return Err(Err::Error(e));
367      }
368    }
369
370    let start = new_input.loc();
371    let start_cursor = new_input.cursor;
372    let (input, code) = match mech_code_alt(new_input.clone()) {
373      Err(Err::Error(mut e)) => {
374        // if the error is just "Unexpected character", we will just fail.
375        if e.error_detail.message == "Unexpected character" {
376          if output.len() > 0 {
377            return Ok((new_input, output));
378          } else {
379            return Err(Err::Error(e));
380          }
381        } else {
382          e.cause_range = SourceRange { start, end: e.cause_range.end };
383          e.log();
384          // skip till the end of the statement
385          let (input, skipped) = skip_till_end_of_statement(e.remaining_input)?;
386          // get tokens from start_cursor to input.cursor
387          let skipped_input = input.slice(start_cursor, input.cursor);
388          let skipped_token = Token {
389            kind: TokenKind::Error,
390            chars: skipped_input.chars().collect(),
391            src_range: SourceRange { start, end: input.loc() },
392          };
393          let mech_error = MechCode::Error(skipped_token, e.cause_range);
394          (input, mech_error)
395        }
396      }
397      Err(Err::Failure(mut e)) => {
398        // Check if this thing matches a section element:
399        match subtitle(new_input.clone()) {
400          Ok((_, _)) => {
401            // if it does, and we have already parsed something, return what we have.
402            if output.len() > 0 {
403              return Ok((new_input, output));
404            } else {
405              return Err(Err::Failure(e));
406            }
407          }
408          Err(_) => { /* continue with error recovery */ }
409        }
410        e.cause_range = SourceRange { start, end: e.cause_range.end };
411        e.log();
412        // skip till the end of the statement
413        let (input, skipped) = skip_till_end_of_statement(e.remaining_input)?;
414        // get tokens from start_cursor to input.cursor
415        let skipped_input = input.slice(start_cursor, input.cursor);
416        let skipped_token = Token {
417          kind: TokenKind::Error,
418          chars: skipped_input.chars().collect(),
419          src_range: SourceRange { start, end: input.loc() },
420        };
421        let mech_error = MechCode::Error(skipped_token, e.cause_range);
422        (input, mech_error)
423      },
424      Ok(x) => x,
425      _ => unreachable!(),
426    };
427    let (input, cmmt) = match code_terminal(input) {
428      Ok((input, cmmt)) => (input, cmmt),
429      Err(e) => {
430        // if we didn't parse a terminal, just return what we've got so far.
431        if output.len() > 0 {
432          return Ok((new_input, output));
433        }
434        // otherwise, return the error.
435        return Err(e);
436      }
437    };
438    output.push((code, cmmt));
439    new_input = input;
440    if new_input.is_empty() {
441      break;
442    }
443  }
444  Ok((new_input, output))
445}
446
447// program := ws0, ?title, body, ws0 ;
448pub fn program(input: ParseString) -> ParseResult<Program> {
449  let msg = "Expects program body";
450  let (input, _) = whitespace0(input)?;
451  let (input, title) = opt(title)(input)?;
452  //let (input, body) = labelr!(body, skip_nil, msg)(input)?;
453  let (input, body) = body(input)?;
454  //println!("Parsed program body: {:#?}", body);
455  let (input, _) = whitespace0(input)?;
456  Ok((input, Program{title, body}))
457}
458
459// parse_mech := program | statement ;
460pub fn parse_mech(input: ParseString) -> ParseResult<Program> {
461  //let (input, mech) = alt((program, statement))(input)?;
462  //Ok((input, ParserNode::Root { children: vec![mech] }))
463  let (input, mech) = program(input)?;
464  Ok((input, mech))
465}
466
467// 5. Error Reporting
468// --------------------
469
470/// Print formatted error message.
471pub fn print_err_report(text: &str, report: &ParserErrorReport) {
472  let msg = TextFormatter::new(text).format_error(report);
473  println!("{}", msg);
474}
475
476pub fn parse_grammar(text: &str) -> MResult<Grammar> {
477  // remove all whitespace from the input string
478  let text_no_Ws = &text.replace(" ", "").replace("\n", "").replace("\r", "").replace("\t", "");
479  let graphemes = graphemes::init_source(text_no_Ws);
480  let mut result_node = None;
481  let mut error_log: Vec<(SourceRange, ParseErrorDetail)> = vec![];
482
483  // Do parse
484  let remaining: ParseString = match grammar(ParseString::new(&graphemes)) {
485    // Got a parse tree, however there may be errors
486    Ok((mut remaining_input, parse_tree)) => {
487      error_log.append(&mut remaining_input.error_log);
488      result_node = Some(parse_tree);
489      remaining_input
490    },
491    // Parsing failed and could not be recovered. No parse tree was created in this case
492    Err(err) => {
493      match err {
494        Err::Error(mut e) | Err::Failure(mut e) => {
495          error_log.append(&mut e.remaining_input.error_log);
496          error_log.push((e.cause_range, e.error_detail));
497          e.remaining_input
498        },
499        Err::Incomplete(_) => panic!("nom::Err::Incomplete is not supported!"),
500      }
501    },
502  };
503  // Check if all inputs were parsed
504  if remaining.len() != 0 {
505    let e = ParseError::new(remaining, "Inputs since here are not parsed");
506    error_log.push((e.cause_range, e.error_detail));
507  }
508
509  // Construct result
510  if error_log.is_empty() {
511    Ok(result_node.unwrap())
512  } else {
513    let report: Vec<ParserErrorContext> = error_log.into_iter().map(|e| ParserErrorContext {
514      cause_rng: e.0,
515      err_message: String::from(e.1.message),
516      annotation_rngs: e.1.annotation_rngs,
517    }).collect();
518    Err(MechError2::new(
519      ParserErrorReport(text.to_string(), report),
520      None
521    ))
522  }
523}
524
525
526pub fn parse(text: &str) -> MResult<Program> {
527  let graphemes = graphemes::init_source(text);
528  let mut result_node = None;
529  let mut error_log: Vec<(SourceRange, ParseErrorDetail)> = vec![];
530
531  // Do parse
532  let remaining: ParseString = match parse_mech(ParseString::new(&graphemes)) {
533    // Got a parse tree, however there may be errors
534    Ok((mut remaining_input, parse_tree)) => {
535      error_log.append(&mut remaining_input.error_log);
536      result_node = Some(parse_tree);
537      remaining_input
538    },
539    // Parsing failed and could not be recovered. No parse tree was created in this case
540    Err(err) => {
541      match err {
542        Err::Error(mut e) | Err::Failure(mut e) => {
543          error_log.append(&mut e.remaining_input.error_log);
544          error_log.push((e.cause_range, e.error_detail));
545          e.remaining_input
546        },
547        Err::Incomplete(_) => panic!("nom::Err::Incomplete is not supported!"),
548      }
549    },
550  };
551
552  // Check if all inputs were parsed
553  if remaining.len() != 0 {
554    let e = ParseError::new(remaining, "Inputs since here are not parsed");
555    error_log.push((e.cause_range, e.error_detail));
556  }
557
558  // Construct result
559  if error_log.is_empty() {
560    Ok(result_node.unwrap())
561  } else {
562    let report: Vec<ParserErrorContext> = error_log.into_iter().map(|e| ParserErrorContext {
563      cause_rng: e.0,
564      err_message: String::from(e.1.message),
565      annotation_rngs: e.1.annotation_rngs,
566    }).collect();
567    Err(MechError2::new(
568      ParserErrorReport(text.to_string(), report),
569      None
570    ).with_compiler_loc())
571  }
572}