mech_syntax/
lib.rs

1// # Syntax
2
3#![cfg_attr(feature = "no-std", no_std)]
4#![cfg_attr(feature = "no-std", alloc)]
5#![feature(extract_if)]
6#![feature(get_mut_unchecked)]
7#![allow(dead_code)]
8#![allow(warnings)]
9#![feature(step_trait)]
10
11extern crate mech_core;
12#[cfg(feature="no-std")] #[macro_use] extern crate alloc;
13#[cfg(not(feature = "no-std"))] extern crate core;
14extern crate hashbrown;
15extern crate nom;
16extern crate nom_unicode;
17#[macro_use]
18extern crate lazy_static;
19extern crate nalgebra as na;
20extern crate tabled;
21extern crate libm;
22
23use mech_core::*;
24use mech_core::nodes::*;
25use std::cell::RefCell;
26use std::rc::Rc;
27
28#[cfg(not(feature = "no-std"))] use core::fmt;
29#[cfg(feature = "no-std")] use alloc::fmt;
30#[cfg(feature = "no-std")] use alloc::string::String;
31#[cfg(feature = "no-std")] use alloc::vec::Vec;
32use nom::{
33  IResult,
34  branch::alt,
35  sequence::tuple,
36  combinator::{opt, eof},
37  multi::{many1, many_till, many0, separated_list1},
38  Err,
39};
40
41use std::collections::HashMap;
42use colored::*;
43
44pub mod mechdown;
45pub mod expressions;
46pub mod statements;
47pub mod structures;
48pub mod base;
49pub mod parser;
50pub mod formatter;
51pub mod grammar;
52
53pub use crate::parser::*;
54pub use crate::mechdown::*;
55pub use crate::expressions::*;
56pub use crate::statements::*;
57pub use crate::structures::*;
58pub use crate::base::*;
59pub use crate::formatter::*;
60pub use crate::grammar::*;
61
62
63/// Unicode grapheme group utilities.
64/// Current implementation does not guarantee correct behavior for
65/// all possible unicode characters.
66pub mod graphemes {
67  use unicode_segmentation::UnicodeSegmentation;
68
69  /// Obtain unicode grapheme groups from input source, then make sure
70  /// it ends with new_line.  Many functions in the parser assume input
71  /// ends with new_line.
72  pub fn init_source(text: &str) -> Vec<&str> {
73    let mut graphemes = UnicodeSegmentation::graphemes(text, true).collect::<Vec<&str>>();
74    graphemes.push("\n");
75    graphemes
76  }
77
78  pub fn init_tag(tag: &str) -> Vec<&str> {
79    UnicodeSegmentation::graphemes(tag, true).collect::<Vec<&str>>()
80  }
81
82  pub fn is_new_line(grapheme: &str) -> bool {
83    match grapheme {
84      "\r" | "\n" | "\r\n" => true,
85      _ => false,
86    }
87  }
88
89  pub fn is_numeric(grapheme: &str) -> bool {
90    grapheme.chars().next().unwrap().is_numeric()
91  }
92
93  pub fn is_alpha(grapheme: &str) -> bool {
94    grapheme.chars().next().unwrap().is_alphabetic()
95  }
96
97  pub fn is_emoji(grapheme: &str) -> bool {
98    let ch = grapheme.chars().next().unwrap();
99    !(ch.is_alphanumeric() || ch.is_ascii())
100  }
101
102  pub fn width(grapheme: &str) -> usize {
103    // TODO: uniode width?
104    let ch = grapheme.chars().next().unwrap();
105    if ch == '\t' {
106      1
107    } else if ch.is_control() {
108      0
109    } else {
110      1
111    }
112  }
113}
114
115/// Just alias
116pub type ParseResult<'a, O> = IResult<ParseString<'a>, O, ParseError<'a>>;
117
118/// The input type for nom parsers. Instead of holding the actual input
119/// string, this struct only holds a reference to that string so that it
120/// can be cloned at much lower cost.
121#[derive(Clone, Debug)]
122pub struct ParseString<'a> {
123  /// Source code
124  pub graphemes: &'a Vec<&'a str>,
125  /// Error report, a list of (error_location, error_context)
126  pub error_log: Vec<(SourceRange, ParseErrorDetail)>,
127  /// Point at the next grapheme to consume
128  pub cursor: usize,
129  /// Location of the grapheme pointed by cursor
130  pub location: SourceLocation,
131}
132
133impl<'a> ParseString<'a> {
134  /// Must always point a an actual string
135  pub fn new(graphemes: &'a Vec<&'a str>) -> Self {
136    ParseString {
137      graphemes,
138      error_log: vec![],
139      cursor: 0,
140      location: SourceLocation { row: 1, col: 1 },
141    }
142  }
143
144  pub fn current(&self) -> Option<&str> {
145    self.graphemes.get(self.cursor).copied()
146  }
147
148  /// If current location matches the tag, consume the matched string.
149  fn consume_tag(&mut self, tag: &str) -> Option<String> {
150    if self.is_empty() {
151      return None;
152    }
153    let current = self.graphemes[self.cursor];
154
155    let gs = graphemes::init_tag(tag); 
156    let gs_len = gs.len();
157
158    // Must have enough remaining characters
159    if self.len() < gs_len {
160      return None;
161    }
162
163    // Try to match the tag
164    let mut tmp_location = self.location;
165    for i in 0..gs_len {
166      let c = self.cursor + i;
167      let g = self.graphemes[c];
168      if g != gs[i] {
169        return None;
170      }
171      if graphemes::is_new_line(g) {
172        if !self.is_last_grapheme(c) {
173          tmp_location.row += 1;
174          tmp_location.col = 1;
175        }
176      } else {
177        tmp_location.col += graphemes::width(g);
178      }
179    }
180    // Tag matched, commit change
181    self.cursor += gs_len;
182    self.location = tmp_location;
183    Some(tag.to_string())
184  }
185
186  /// Mutate self by consuming one grapheme
187  fn consume_one(&mut self) -> Option<String> {
188    if self.is_empty() {
189      return None;
190    }
191    let g = self.graphemes[self.cursor];
192    if graphemes::is_new_line(g) {
193      if !self.is_last_grapheme(self.cursor) {
194        self.location.row += 1;
195        self.location.col = 1;
196      }
197    } else {
198      self.location.col += graphemes::width(g);
199    }
200    self.cursor += 1;
201    Some(g.to_string())
202  }
203
204
205  /// If current location matches any emoji, consume the matched string.
206  fn consume_emoji(&mut self) -> Option<String> {
207    if self.is_empty() {
208      return None;
209    }
210    let g = self.graphemes[self.cursor];
211    
212    if graphemes::is_emoji(g) {
213      self.cursor += 1;
214      self.location.col += graphemes::width(g);
215      Some(g.to_string())
216    } else {
217      None
218    }
219  }
220
221  /// If current location matches any alpha char, consume the matched string.
222  fn consume_alpha(&mut self) -> Option<String> {
223    if self.is_empty() {
224      return None;
225    }
226    let g = self.graphemes[self.cursor];
227    if graphemes::is_alpha(g) {
228      self.cursor += 1;
229      self.location.col += graphemes::width(g);
230      Some(g.to_string())
231    } else {
232      None
233    }
234  }
235
236  /// If current location matches any digit, consume the matched string.
237  fn consume_digit(&mut self) -> Option<String> {
238    if self.is_empty() {
239      return None;
240    }
241    let g = self.graphemes[self.cursor];
242    if graphemes::is_numeric(g) {
243      self.cursor += 1;
244      self.location.col += graphemes::width(g);
245      Some(g.to_string())
246    } else {
247      None
248    }
249  }
250
251  /// Get cursor's location in source code
252  fn loc(&self) -> SourceLocation {
253    self.location
254  }
255
256  /// Test whether the grapheme pointed by cursor is the last grapheme
257  fn is_last_grapheme(&self, c: usize) -> bool {
258    (self.graphemes.len() - 1 - c) == 0
259  }
260
261  /// Get remaining (unparsed) length
262  pub fn len(&self) -> usize {
263    self.graphemes.len() - self.cursor
264  }
265  
266  pub fn is_empty(&self) -> bool {
267    self.len() == 0
268  }
269
270  /// For debug purpose
271  fn output(&self) {
272              
273    println!("───────────────────{}", self.len());
274    for i in self.cursor..self.graphemes.len() {
275      print!("{}", self.graphemes[i]);
276    }
277    println!();
278    println!("───────────────────");
279  }
280}
281
282/// Required by nom
283impl<'a> nom::InputLength for ParseString<'a> {
284  fn input_len(&self) -> usize {
285    self.len()
286  }
287}
288
289/// The part of error context that's independent to its cause location.
290#[derive(Clone, Debug)]
291pub struct ParseErrorDetail {
292  pub message: &'static str,
293  pub annotation_rngs: Vec<SourceRange>,
294}
295
296/// The error type for the nom parser, which handles full error context
297/// (location + detail) and ownership of the input ParseString.
298///
299/// Eventually error context will be logged and ownership will be moved out.
300#[derive(Clone, Debug)]
301pub struct ParseError<'a> {
302  /// Cause range is defined as [start, end), where `start` points at the first
303  /// character that's catched by a label, and `end` points at the next 
304  /// character of the character that didn't match.
305  ///
306  /// Example:
307  ///   index:  1234567
308  ///   input:  abcdefg
309  ///   error:   ~~~^
310  ///   range:   |   |
311  ///           [2,  5)
312  ///
313  pub cause_range: SourceRange,
314  /// Hold ownership to the input ParseString
315  pub remaining_input: ParseString<'a>,
316  /// Detailed information about this error
317  pub error_detail: ParseErrorDetail,
318}
319
320impl<'a> ParseError<'a> {
321  /// Create a new error at current location of the input, with given message
322  /// and empty annotations.  Ownership of the input is also passed into this
323  /// error object.
324  pub fn new(input: ParseString<'a>, msg: &'static str) -> Self {
325    let start = input.loc();
326    let mut end = start;
327    end.col += 1;
328    ParseError {
329      cause_range: SourceRange { start, end },
330      remaining_input: input,
331      error_detail: ParseErrorDetail {
332        message: msg,
333        annotation_rngs: vec![],
334      }
335    }
336  }
337
338  /// Add self to the error log of input string.
339  fn log(&mut self) {
340    self.remaining_input.error_log.push((self.cause_range, self.error_detail.clone()));
341  }
342}
343
344/// Required by nom
345impl<'a> nom::error::ParseError<ParseString<'a>> for ParseError<'a> {
346  /// Not used, unless we have logical error
347  fn from_error_kind(input: ParseString<'a>,
348                      _kind: nom::error::ErrorKind) -> Self {
349    ParseError::new(input, "Unexpected error")
350  }
351
352  /// Probably not used
353  fn append(_input: ParseString<'a>,
354            _kind: nom::error::ErrorKind,
355            other: Self) -> Self {
356    other
357  }
358
359  /// Barely used, but we do want to keep the error with larger depth.
360  fn or(self, other: Self) -> Self {
361    let self_start = self.cause_range.start;
362    let other_start = other.cause_range.start;
363    if self_start > other_start {
364      self
365    } else {
366      other
367    }
368  }
369}
370
371
372
373/// This struct is responsible for analysing text, interpreting indices
374/// and ranges, and producing formatted messages.
375pub struct TextFormatter<'a> {
376  graphemes: Vec<&'a str>,
377  line_beginnings: Vec<usize>,
378  end_index: usize,
379}
380
381impl<'a> TextFormatter<'a> {
382  pub fn new(text: &'a str) -> Self {
383    let graphemes = graphemes::init_source(text);
384    let mut line_beginnings = vec![0];
385    for i in 0..graphemes.len() {
386      if graphemes::is_new_line(graphemes[i]) {
387        line_beginnings.push(i + 1);
388      }
389    }
390    line_beginnings.pop();
391    TextFormatter {
392      end_index: graphemes.len(),
393      graphemes,
394      line_beginnings,
395    }
396  }
397
398  // Index interpreter
399
400  fn get_line_range(&self, linenum: usize) -> Option<(usize, usize)> {
401    let line_index = linenum - 1;
402    if line_index >= self.line_beginnings.len() {
403      return None;
404    }
405    if linenum == self.line_beginnings.len() {  // asking for the last line
406      return Some((self.line_beginnings[line_index], self.end_index));
407    }
408    Some((self.line_beginnings[line_index], self.line_beginnings[linenum]))
409  }
410
411  fn get_text_by_linenum(&self, linenum: usize) -> String {
412    let (start, end) = match self.get_line_range(linenum) {
413      Some(v) => v,
414      None => return "\n".to_string(),
415    };
416    let mut s = self.graphemes[start..end].iter().map(|s| *s).collect::<String>();
417    if !s.ends_with("\n") {
418      s.push('\n');
419    }
420    s
421  }
422
423  fn get_textlen_by_linenum(&self, linenum: usize) -> usize {
424    let (start, end) = match self.get_line_range(linenum) {
425      Some(v) => v,
426      None => return 1,
427    };
428    let mut len = 0;
429    for i in start..end {
430      len += graphemes::width(self.graphemes[i]);
431    }
432    len + 1
433  }
434
435  // FormattedString printer
436
437  fn heading_color(s: &str) -> String {
438    s.truecolor(246, 192, 78).bold().to_string()
439  }
440
441  fn location_color(s: &str) -> String {
442    s.truecolor(0,187,204).bold().to_string()
443  }
444
445  fn linenum_color(s: &str) -> String {
446    s.truecolor(0,187,204).bold().to_string()
447  }
448
449  fn text_color(s: &str) -> String {
450    s.to_string()
451  }
452
453  fn annotation_color(s: &str) -> String {
454    s.truecolor(102,51,153).bold().to_string()
455  }
456
457  fn error_color(s: &str) -> String {
458    s.truecolor(170,51,85).bold().to_string()
459  }
460
461  fn ending_color(s: &str) -> String {
462    s.truecolor(246, 192, 78).bold().to_string()
463  }
464
465  fn err_heading(index: usize) -> String {
466    let n = index + 1;
467    let d = "────────────────────────";
468    let s = format!("{} syntax error #{} {}\n", d, n, d);
469    Self::heading_color(&s)
470  }
471
472  fn err_location(&self, ctx: &ParserErrorContext) -> String {
473    let err_end = ctx.cause_rng.end;
474    // error range will not ends at first column, so `minus 1` here is safe
475    let (row, col) = (err_end.row, err_end.col - 1);
476    let s = format!("@location:{}:{}\n", row, col);
477    Self::location_color(&s)
478  }
479
480  fn err_context(&self, ctx: &ParserErrorContext) -> String {
481    let mut result = String::new();
482
483    let mut annotation_rngs = ctx.annotation_rngs.clone();
484    annotation_rngs.push(ctx.cause_rng);
485
486    // the lines to print (1-indexed)
487    let mut lines_to_print: Vec<usize> = vec![];
488    for rng in &annotation_rngs {
489      let r1 = rng.start.row;
490      // if range ends at first column, it doesn't reach that row
491      let r2 = if rng.end.col == 1 {
492        usize::max(rng.start.row, rng.end.row - 1)
493      } else {
494        rng.end.row
495      };
496      for i in r1..=r2 {
497        lines_to_print.push(i);
498      }
499    }
500    lines_to_print.sort();
501    lines_to_print.dedup();
502
503    // the annotations on each line
504    // <linenum, Vec<(start_col, rng_len, is_major, is_cause)>>
505    let mut range_table: HashMap<usize, Vec<(usize, usize, bool, bool)>> = HashMap::new();
506    for linenum in &lines_to_print {
507      range_table.insert(*linenum, vec![]);
508    }
509    let n = annotation_rngs.len() - 1;  // if i == n, it's the last rng, i.e. the cause rng
510    for (i, rng) in annotation_rngs.iter().enumerate() {
511      // c2 might be 0
512      let (r1, c1) = (rng.start.row, rng.start.col);
513      let (r2, c2) = (rng.end.row, rng.end.col - 1);
514      if r1 == r2 {  // the entire range is on one line
515        if c2 >= c1 {  // and the range has non-zero length
516          range_table.get_mut(&r1).unwrap().push((c1, c2 - c1 + 1, true, i == n));
517        }
518      } else {  // the range spans over multiple lines
519        range_table.get_mut(&r1).unwrap().push((c1, usize::MAX, i != n, i == n));
520        for r in r1+1..r2 {
521          range_table.get_mut(&r).unwrap().push((1, usize::MAX, false, i == n));
522        }
523        if c2 != 0 {  // only add the last line if it hfnas non-zero length
524          range_table.get_mut(&r2).unwrap().push((1, c2, i == n, i == n));
525        }
526      }
527    }
528
529    // other data for printing
530    let dots = "...";
531    let indentation = " ";
532    let vert_split1 = " │";
533    let vert_split2 = "  ";
534    let arrow = "^";
535    let tilde = "~";
536    let lines_str: Vec<String> = lines_to_print.iter().map(|i| i.to_string()).collect();
537    let row_str_len = usize::max(lines_str.last().unwrap().len(), dots.len());
538
539    // print source code
540    for i in 0..lines_to_print.len() {
541      // [... | ]
542      if i != 0 && (lines_to_print[i] - lines_to_print[i-1] != 1) {
543        result.push_str(indentation);
544        for _ in 3..row_str_len { result.push(' '); }
545        result.push_str(&Self::linenum_color(dots));
546        result.push_str(&Self::linenum_color(vert_split1));
547        result.push('\n');
548      }
549
550      // [    | ]
551      result.push_str(indentation);
552      for _ in 0..row_str_len { result.push(' '); }
553      result.push_str(&Self::linenum_color(vert_split1));
554      result.push('\n');
555
556      // [row |  program text...]
557      let text = self.get_text_by_linenum(lines_to_print[i]);
558      result.push_str(indentation);
559      for _ in 0..row_str_len-lines_str[i].len() { result.push(' '); }
560      result.push_str(&Self::linenum_color(&lines_str[i]));
561      result.push_str(&Self::linenum_color(vert_split1));
562      result.push_str(&Self::text_color(&text));
563
564      // [    |    ^~~~]
565      result.push_str(indentation);
566      for _ in 0..row_str_len { result.push(' '); }
567      result.push_str(&Self::linenum_color(vert_split1));
568      let mut curr_col = 1;
569      let line_len = self.get_textlen_by_linenum(lines_to_print[i]);
570      let rngs = range_table.get(&lines_to_print[i]).unwrap();
571      for (start, len, major, cause) in rngs {
572        let max_len = usize::max(1, usize::min(*len, line_len - curr_col + 1));
573        for _ in curr_col..*start { result.push(' '); }
574        if *cause {
575          for _ in 0..max_len-1 {
576            result.push_str(&Self::error_color(tilde));
577          }
578          if *major {
579            result.push_str(&Self::error_color(arrow));
580          } else {
581            result.push_str(&Self::error_color(tilde));
582          }
583        } else {
584          if *major {
585            result.push_str(&Self::annotation_color(arrow));
586          } else {
587            result.push_str(&Self::annotation_color(tilde));
588          }
589          for _ in 0..max_len-1 {
590            result.push_str(&Self::annotation_color(tilde));
591          }
592        }
593        curr_col = start + max_len;
594      }
595      result.push('\n');
596    }
597
598    // print error message;
599    // error range never ends at first column, so it's safe to `minus 1` here
600    let cause_col = ctx.cause_rng.end.col - 1;
601    result.push_str(indentation);
602    for _ in 0..row_str_len { result.push(' '); }
603    result.push_str(vert_split2);
604    for _ in 0..cause_col-1 { result.push(' '); }
605    result.push_str(&Self::error_color(&ctx.err_message));
606    result.push('\n');
607
608    result
609  }
610
611  fn err_ending(d: usize) -> String {
612    let s = format!("... and {} other error{} not shown\n", d, if d == 1 {""} else {"s"});
613    Self::heading_color(&s)
614  }
615
616  /// Get formatted error message.
617  pub fn format_error(&self, errors: &ParserErrorReport) -> String {
618    let n = usize::min(errors.len(), 10);
619    let mut result = String::new();
620    result.push('\n');
621    for i in 0..n {
622      let ctx = &errors[i];
623      result.push_str(&Self::err_heading(i));
624      result.push_str(&self.err_location(ctx));
625      result.push_str(&self.err_context(ctx));
626      result.push_str("\n\n");
627    }
628    let d = errors.len() - n;
629    if d != 0 {
630      result.push_str(&Self::err_ending(d));
631    }
632    result
633  }
634}