Skip to main content

mech_syntax/
parser.rs

1// Parser
2// ========
3
4/// Sections:
5///   1. Prelude
6///   2. Parser combinators
7///   3. Recovery functions
8///   4. Public interface
9///   5. Error reporting
10
11// 1. Prelude
12// ------------
13
14use crate::*;
15use crate::functions::function_define;
16
17use mech_core::nodes::*;
18use mech_core::nodes::{SectionElement, MechString, Table};
19
20#[cfg(not(feature = "no-std"))] use core::fmt;
21#[cfg(feature = "no-std")] use alloc::fmt;
22#[cfg(feature = "no-std")] use alloc::string::String;
23#[cfg(feature = "no-std")] use alloc::vec::Vec;
24use nom::{
25  IResult,
26  branch::alt,
27  sequence::{tuple as nom_tuple, preceded},
28  combinator::{opt, eof, cut, peek},
29  multi::{many1, many_till, many0, separated_list1, separated_list0},
30  Err,
31  Err::Failure
32};
33
34use std::collections::HashMap;
35use colored::*;
36
37//use crate::*;
38use crate::{
39  TextFormatter,
40  ParseError,
41  ParseString,
42  ParseErrorDetail,
43  graphemes,
44  ParseResult,
45};
46
47// 2. Parser combinators
48// -----------------------
49
50/// Convert output of any parser into ParserNode::Null.
51/// Useful for working with `alt` combinator and error recovery functions.
52pub fn null<'a, F, O>(mut parser: F) ->
53  impl FnMut(ParseString<'a>) -> ParseResult<()>
54where
55  F: FnMut(ParseString<'a>) -> ParseResult<O>
56{
57  move |input: ParseString| match parser(input) {
58    Ok((remaining, _)) => Ok((remaining, ())),
59    Err(Err::Error(e)) => Err(Err::Error(e)),
60    Err(Err::Failure(e)) => Err(Err::Failure(e)),
61    x => panic!("Err::Incomplete is not supported"),
62  }
63}
64
65/// For parser p, run p and also output the range that p has matched
66/// upon success.
67pub fn range<'a, F, O>(mut parser: F) ->
68  impl FnMut(ParseString<'a>) -> ParseResult<(O, SourceRange)>
69where
70  F: FnMut(ParseString<'a>) -> ParseResult<O>
71{
72  move |input: ParseString| {
73    let start = input.loc();
74    match parser(input) {
75      Ok((remaining, o)) => {
76        let rng = SourceRange { start, end: remaining.loc(), };
77        Ok((remaining, (o, rng)))
78      },
79      Err(e) => Err(e),
80    }
81  }
82}
83
84#[macro_export]
85macro_rules! label {
86  ($parser:expr, $msg:expr) => {
87    (label_without_recovery($parser, ParseErrorDetail {
88      message: $msg, annotation_rngs: vec![]
89    }))
90  };
91
92  ($parser:expr, $msg:expr, $($rngs:expr),+) => {
93    (label_without_recovery($parser, ParseErrorDetail {
94      message: $msg, annotation_rngs: vec![$($rngs),+]
95    }))
96  };
97}
98
99#[macro_export]
100macro_rules! labelr {
101  ($parser:expr, $recovery_fn:expr, $msg:expr) => {
102    (label_with_recovery($parser, $recovery_fn, ParseErrorDetail {
103      message: $msg, annotation_rngs: vec![]
104    }))
105  };
106
107  ($parser:expr, $recovery_fn:expr, $msg:expr, $($rngs:expr),+) => {
108    (label_with_recovery($parser, $recovery_fn, ParseErrorDetail {
109      message: $msg, annotation_rngs: vec![$($rngs),+]
110    }))
111  };
112}
113
114/// Label without recovery function. Upgrade Err::Error to Err:Failure
115/// and override its context information.
116pub fn label_without_recovery<'a, F, O>(
117  mut parser: F,
118  error_detail: ParseErrorDetail,
119) ->
120  impl FnMut(ParseString<'a>) -> ParseResult<O>
121where
122  F: FnMut(ParseString<'a>) -> ParseResult<O>
123{
124  move |mut input: ParseString| {
125    let start = input.loc();
126    match parser(input) {
127      Err(Err::Error(mut e)) => {
128        e.cause_range = SourceRange { start, end: e.cause_range.end };
129        e.error_detail = error_detail.clone();
130        Err(Err::Failure(e))
131      }
132      x => x,
133    }
134  }
135}
136
137/// Label with recovery function. In addition to upgrading errors, the
138/// error is logged and recovery function will be run as an attempt to
139/// synchronize parser state.
140pub fn label_with_recovery<'a, F, O>(
141  mut parser: F,
142  mut recovery_fn: fn(ParseString<'a>) -> ParseResult<O>,
143  error_detail: ParseErrorDetail,
144) ->
145  impl FnMut(ParseString<'a>) -> ParseResult<O>
146where
147  F: FnMut(ParseString<'a>) -> ParseResult<O>
148{
149  move |mut input: ParseString| {
150    let start = input.loc();
151    match parser(input) {
152      Err(Err::Error(mut e)) => {
153        e.cause_range = SourceRange { start, end: e.cause_range.end };
154        e.error_detail = error_detail.clone();
155        e.log();
156        recovery_fn(e.remaining_input)
157      }
158      Err(Err::Failure(mut e)) => {
159        e.cause_range = SourceRange { start, end: e.cause_range.end };
160        //e.error_detail = error_detail.clone();
161        e.log();
162        recovery_fn(e.remaining_input)
163      },
164      x => x,
165    }
166  }
167}
168
169/// For parser p, return the `!!p` peek parsing expression.
170pub fn is<'a, F, O>(mut parser: F) ->
171  impl FnMut(ParseString<'a>) -> ParseResult<O>
172where
173  F: FnMut(ParseString<'a>) -> ParseResult<O>
174{
175  move |input: ParseString| {
176    let input_clone = input.clone();
177    match parser(input_clone) {
178      Ok((_, o)) => Ok((input, o)),
179      _ => Err(Err::Error(ParseError::new(input, "Unexpected character"))),
180    }
181  }
182}
183
184/// For parser p, return the `!p` peek parsing expression.
185pub fn is_not<'a, F, E>(mut parser: F) ->
186  impl FnMut(ParseString<'a>) -> ParseResult<()>
187where
188  F: FnMut(ParseString<'a>) -> ParseResult<E>
189{
190  move |input: ParseString| {
191    let input_clone = input.clone();
192    match parser(input_clone) {
193      Err(Err::Failure(_)) |
194      Err(Err::Error(_)) => Ok((input, ())),
195      _ => Err(Err::Error(ParseError::new(input, "Unexpected character")))
196    }
197  }
198}
199
200/// Return a terminal parsing expression that consumes `tag` from input.
201pub fn tag(tag: &'static str) -> impl Fn(ParseString) -> ParseResult<String> {
202  move |mut input: ParseString| {
203    if input.is_empty() {
204      return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")));
205    }
206    if let Some(matched) = input.consume_tag(tag) {
207      Ok((input, matched))
208    } else {
209      Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
210    }
211  }
212}
213
214// 3. Recovery functions
215// -----------------------
216
217// skip_till_eol := (!new_line, any)* ;
218pub fn skip_till_eol(input: ParseString) -> ParseResult<Token> {
219  let (input, matched) = many0(nom_tuple((
220    is_not(new_line),
221    any_token,
222  )))(input)?;
223  let mut matched: Vec<Token> = matched.into_iter().map(|(_, t)| t).collect(); 
224  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default()); 
225  Ok((input, tkn))
226}
227
228// skip_past_eol := skip_till_eol, new_line ;
229pub fn skip_past_eol(input: ParseString) -> ParseResult<Token> {
230  let (input, matched) = skip_till_eol(input)?;
231  let (input, nl) = new_line(input)?;
232  let matched = Token::merge_tokens(&mut vec![matched, nl]).unwrap_or(Token::default());
233  Ok((input, matched))
234}
235
236// skip-till-end-of-statement := *((!new-line, !";"), any) ;
237pub fn skip_till_end_of_statement(input: ParseString) -> ParseResult<Token> {
238  // If empty, return
239  if input.is_empty() {
240      return Ok((input, Token::default()));
241  }
242
243  // Consume until either newline or ;
244  let (input, matched) = many0(nom_tuple((
245      // is_not matches any char NOT in the set
246      is_not(alt((
247          new_line,
248          semicolon,
249          mika_section_close,
250      ))),
251      any_token,
252  )))(input)?;
253
254  let mut matched: Vec<Token> = matched.into_iter().map(|(_, t)| t).collect();
255  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default());
256
257  Ok((input, tkn))
258}
259
260// skip_till_section_element := skip_past_eol, (!section_element, skip_past_eol)* ;
261pub fn skip_till_section_element(input: ParseString) -> ParseResult<Token> {
262  if input.is_empty() {
263    return Ok((input, Token::default()));
264  }
265  let (input, matched) = skip_past_eol(input)?;
266  let (input, matched2) = many0(nom_tuple((
267    is_not(section_element),
268    skip_past_eol,
269  )))(input)?;
270  let mut matched: Vec<Token> = vec![matched];
271  matched.extend(matched2.into_iter().map(|(_, t)| t));
272  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default());
273  Ok((input, tkn))
274}
275
276pub fn skip_till_paragraph_element(input: ParseString) -> ParseResult<Token> {
277  // if it's empty, return
278  if input.is_empty() {
279    return Ok((input, Token::default()));
280  }
281  // Otherwise, consume tokens until we reach a paragraph element
282  let (input, matched) = many0(nom_tuple((
283    is_not(paragraph_element),
284    any_token,
285  )))(input)?;
286  let mut matched: Vec<Token> = matched.into_iter().map(|(_, t)| t).collect(); 
287  let tkn = Token::merge_tokens(&mut matched).unwrap_or(Token::default());
288  Ok((input, tkn))
289}
290
291// skip_spaces := space* ;
292pub fn skip_spaces(input: ParseString) -> ParseResult<()> {
293  let (input, _) = many0(space)(input)?;
294  Ok((input, ()))
295}
296
297// skip_nil := ;
298pub fn skip_nil(input: ParseString) -> ParseResult<()> {
299  Ok((input, ()))
300}
301
302// skip_empty_mech_directive := ;
303pub fn skip_empty_mech_directive(input: ParseString) -> ParseResult<String> {
304  Ok((input, String::from("mech:")))
305}
306
307// recovery function for Recoverable nodes with customizable skip function
308pub fn recover<T: Recoverable, F>(input: ParseString, skip_fn: F) -> ParseResult<T>
309where
310  F: Fn(ParseString) -> ParseResult<Token>,
311{
312  let start = input.loc();
313  let (input, matched) = skip_fn(input)?;
314  let end = input.loc();
315  Ok((input, T::error_placeholder(matched, SourceRange { start, end })))
316}
317
318// 4. Public interface
319// ---------------------
320
321// mech_code_alt := fsm_specification | fsm_implementation | function_define | statement | expression | comment ;
322pub fn mech_code_alt(input: ParseString) -> ParseResult<MechCode> {
323  let (input, _) = whitespace0(input)?;
324  let parsers: Vec<(&str, Box<dyn Fn(ParseString) -> ParseResult<MechCode>>)> = vec![
325    // ("fsm_specification", Box::new(|i| fsm_specification(i).map(|(i, v)| (i, MechCode::FsmSpecification(v))))),
326    // ("fsm_implementation", Box::new(|i| fsm_implementation(i).map(|(i, v)| (i, MechCode::FsmImplementation(v))))),
327    ("function_define", Box::new(|i| function_define(i).map(|(i, v)| (i, MechCode::FunctionDefine(v))))),
328    ("statement",   Box::new(|i| statement(i).map(|(i, v)| (i, MechCode::Statement(v))))),
329    ("expression",  Box::new(|i| expression(i).map(|(i, v)| (i, MechCode::Expression(v))))),
330    ("comment",     Box::new(|i| comment(i).map(|(i, v)| (i, MechCode::Comment(v))))),
331  ];
332  match alt_best(input, &parsers) {
333    Ok((input, code)) => {
334      return Ok((input, code));
335    }
336    Err(e) => {
337      return Err(e);
338    }
339  };
340
341}
342
343/// code-terminal := *space-tab, ?(?semicolon, *space-tab, comment), (new-line | ";" | eof), *whitespace ;
344pub fn code_terminal(input: ParseString) -> ParseResult<Option<Comment>> {
345  let (input, _) = many0(space_tab)(input)?;
346  let (input, cmmnt) = opt(tuple((opt(semicolon), many0(space_tab), comment)))(input)?;
347  let (input, _) = alt((null(new_line), null(semicolon), null(eof), null(peek(mika_section_close))))(input)?;
348  let (input, _) = whitespace0(input)?;
349  let cmmt = match cmmnt {
350    Some((_, _, cmnt)) => Some(cmnt),
351    None => None,
352  };
353  Ok((input, cmmt))
354}
355
356// mech-code-block := +(mech-code, code-terminal) ;
357pub fn mech_code(input: ParseString) -> ParseResult<Vec<(MechCode,Option<Comment>)>> {
358  let mut output = vec![];
359  let mut new_input = input.clone();
360  loop {
361
362    if peek(not_mech_code)(new_input.clone()).is_ok() {
363      if output.len() > 0 {
364        return Ok((new_input, output));
365      } else {
366        let e = ParseError::new(new_input, "Unexpected character");
367        return Err(Err::Error(e));
368      }
369    }
370
371    let start = new_input.loc();
372    let start_cursor = new_input.cursor;
373    let (input, code) = match mech_code_alt(new_input.clone()) {
374      Err(Err::Error(mut e)) => {
375        // if the error is just "Unexpected character", we will just fail.
376        if e.error_detail.message == "Unexpected character" {
377          if output.len() > 0 {
378            return Ok((new_input, output));
379          } else {
380            return Err(Err::Error(e));
381          }
382        } else {
383          e.cause_range = SourceRange { start, end: e.cause_range.end };
384          e.log();
385          // skip till the end of the statement
386          let (input, skipped) = skip_till_end_of_statement(e.remaining_input)?;
387          // get tokens from start_cursor to input.cursor
388          let skipped_input = input.slice(start_cursor, input.cursor);
389          let skipped_token = Token {
390            kind: TokenKind::Error,
391            chars: skipped_input.chars().collect(),
392            src_range: SourceRange { start, end: input.loc() },
393          };
394          let mech_error = MechCode::Error(skipped_token, e.cause_range);
395          (input, mech_error)
396        }
397      }
398      Err(Err::Failure(mut e)) => {
399        // Check if this thing matches a section element:
400        match subtitle(new_input.clone()) {
401          Ok((_, _)) => {
402            // if it does, and we have already parsed something, return what we have.
403            if output.len() > 0 {
404              return Ok((new_input, output));
405            } else {
406              return Err(Err::Failure(e));
407            }
408          }
409          Err(_) => { /* continue with error recovery */ }
410        }
411        e.cause_range = SourceRange { start, end: e.cause_range.end };
412        e.log();
413        // skip till the end of the statement
414        let (input, skipped) = skip_till_end_of_statement(e.remaining_input)?;
415        // get tokens from start_cursor to input.cursor
416        let skipped_input = input.slice(start_cursor, input.cursor);
417        let skipped_token = Token {
418          kind: TokenKind::Error,
419          chars: skipped_input.chars().collect(),
420          src_range: SourceRange { start, end: input.loc() },
421        };
422        let mech_error = MechCode::Error(skipped_token, e.cause_range);
423        (input, mech_error)
424      },
425      Ok(x) => x,
426      _ => unreachable!(),
427    };
428    let (input, cmmt) = match code_terminal(input) {
429      Ok((input, cmmt)) => (input, cmmt),
430      Err(e) => {
431        // if we didn't parse a terminal, just return what we've got so far.
432        if output.len() > 0 {
433          return Ok((new_input, output));
434        }
435        // otherwise, return the error.
436        return Err(e);
437      }
438    };
439    output.push((code, cmmt));
440    new_input = input;
441    if new_input.is_empty() {
442      break;
443    }
444  }
445  Ok((new_input, output))
446}
447
448// program := ws0, ?title, body, ws0 ;
449pub fn program(input: ParseString) -> ParseResult<Program> {
450  let msg = "Expects program body";
451  let (input, _) = whitespace0(input)?;
452  let (input, title) = opt(title)(input)?;
453  //let (input, body) = labelr!(body, skip_nil, msg)(input)?;
454  let (input, body) = body(input)?;
455  //println!("Parsed program body: {:#?}", body);
456  let (input, _) = whitespace0(input)?;
457  Ok((input, Program{title, body}))
458}
459
460// parse_mech := program | statement ;
461pub fn parse_mech(input: ParseString) -> ParseResult<Program> {
462  //let (input, mech) = alt((program, statement))(input)?;
463  //Ok((input, ParserNode::Root { children: vec![mech] }))
464  let (input, mech) = program(input)?;
465  Ok((input, mech))
466}
467
468// 5. Error Reporting
469// --------------------
470
471/// Print formatted error message.
472pub fn print_err_report(text: &str, report: &ParserErrorReport) {
473  let msg = TextFormatter::new(text).format_error(report);
474  println!("{}", msg);
475}
476
477pub fn parse_grammar(text: &str) -> MResult<Grammar> {
478  // remove all whitespace from the input string
479  let text_no_Ws = &text.replace(" ", "").replace("\n", "").replace("\r", "").replace("\t", "");
480  let graphemes = graphemes::init_source(text_no_Ws);
481  let mut result_node = None;
482  let mut error_log: Vec<(SourceRange, ParseErrorDetail)> = vec![];
483
484  // Do parse
485  let remaining: ParseString = match grammar(ParseString::new(&graphemes)) {
486    // Got a parse tree, however there may be errors
487    Ok((mut remaining_input, parse_tree)) => {
488      error_log.append(&mut remaining_input.error_log);
489      result_node = Some(parse_tree);
490      remaining_input
491    },
492    // Parsing failed and could not be recovered. No parse tree was created in this case
493    Err(err) => {
494      match err {
495        Err::Error(mut e) | Err::Failure(mut e) => {
496          error_log.append(&mut e.remaining_input.error_log);
497          error_log.push((e.cause_range, e.error_detail));
498          e.remaining_input
499        },
500        Err::Incomplete(_) => panic!("nom::Err::Incomplete is not supported!"),
501      }
502    },
503  };
504  // Check if all inputs were parsed
505  if remaining.len() != 0 {
506    let e = ParseError::new(remaining, "Inputs since here are not parsed");
507    error_log.push((e.cause_range, e.error_detail));
508  }
509
510  // Construct result
511  if error_log.is_empty() {
512    Ok(result_node.unwrap())
513  } else {
514    let report: Vec<ParserErrorContext> = error_log.into_iter().map(|e| ParserErrorContext {
515      cause_rng: e.0,
516      err_message: String::from(e.1.message),
517      annotation_rngs: e.1.annotation_rngs,
518    }).collect();
519    Err(MechError::new(
520      ParserErrorReport(text.to_string(), report),
521      None
522    ))
523  }
524}
525
526
527pub fn parse(text: &str) -> MResult<Program> {
528  let graphemes = graphemes::init_source(text);
529  let mut result_node = None;
530  let mut error_log: Vec<(SourceRange, ParseErrorDetail)> = vec![];
531
532  // Do parse
533  let remaining: ParseString = match parse_mech(ParseString::new(&graphemes)) {
534    // Got a parse tree, however there may be errors
535    Ok((mut remaining_input, parse_tree)) => {
536      error_log.append(&mut remaining_input.error_log);
537      result_node = Some(parse_tree);
538      remaining_input
539    },
540    // Parsing failed and could not be recovered. No parse tree was created in this case
541    Err(err) => {
542      match err {
543        Err::Error(mut e) | Err::Failure(mut e) => {
544          error_log.append(&mut e.remaining_input.error_log);
545          error_log.push((e.cause_range, e.error_detail));
546          e.remaining_input
547        },
548        Err::Incomplete(_) => panic!("nom::Err::Incomplete is not supported!"),
549      }
550    },
551  };
552
553  // Check if all inputs were parsed
554  if remaining.len() != 0 {
555    let e = ParseError::new(remaining, "Inputs since here are not parsed");
556    error_log.push((e.cause_range, e.error_detail));
557  }
558
559  // Construct result
560  if error_log.is_empty() {
561    Ok(result_node.unwrap())
562  } else {
563    let report: Vec<ParserErrorContext> = error_log.into_iter().map(|e| ParserErrorContext {
564      cause_rng: e.0,
565      err_message: String::from(e.1.message),
566      annotation_rngs: e.1.annotation_rngs,
567    }).collect();
568    Err(MechError::new(
569      ParserErrorReport(text.to_string(), report),
570      None
571    ).with_compiler_loc())
572  }
573}