mech_syntax/
lib.rs

1// # Syntax
2
3#![cfg_attr(feature = "no-std", no_std)]
4#![cfg_attr(feature = "no-std", alloc)]
5#![feature(extract_if)]
6#![feature(get_mut_unchecked)]
7#![allow(dead_code)]
8#![allow(warnings)]
9#![feature(step_trait)]
10
11extern crate mech_core;
12#[cfg(feature="no-std")] #[macro_use] extern crate alloc;
13#[cfg(not(feature = "no-std"))] extern crate core;
14extern crate hashbrown;
15extern crate nom;
16extern crate nom_unicode;
17#[macro_use]
18extern crate lazy_static;
19extern crate nalgebra as na;
20extern crate tabled;
21extern crate libm;
22
23use mech_core::*;
24use mech_core::nodes::*;
25use std::cell::RefCell;
26use std::rc::Rc;
27
28#[cfg(not(feature = "no-std"))] use core::fmt;
29#[cfg(feature = "no-std")] use alloc::fmt;
30#[cfg(feature = "no-std")] use alloc::string::String;
31#[cfg(feature = "no-std")] use alloc::vec::Vec;
32use nom::{
33  IResult,
34  branch::alt,
35  sequence::tuple,
36  combinator::{opt, eof},
37  multi::{many1, many_till, many0, separated_list1},
38  Err,
39};
40
41use std::collections::HashMap;
42use colored::*;
43
44pub mod mechdown;
45pub mod expressions;
46pub mod statements;
47pub mod structures;
48pub mod base;
49pub mod parser;
50pub mod formatter;
51pub mod grammar;
52
53pub use crate::parser::*;
54pub use crate::mechdown::*;
55pub use crate::expressions::*;
56pub use crate::statements::*;
57pub use crate::structures::*;
58pub use crate::base::*;
59pub use crate::formatter::*;
60pub use crate::grammar::*;
61
62
63/// Unicode grapheme group utilities.
64/// Current implementation does not guarantee correct behavior for
65/// all possible unicode characters.
66pub mod graphemes {
67  use unicode_segmentation::UnicodeSegmentation;
68
69  /// Obtain unicode grapheme groups from input source, then make sure
70  /// it ends with new_line.  Many functions in the parser assume input
71  /// ends with new_line.
72  pub fn init_source(text: &str) -> Vec<&str> {
73    let mut graphemes = UnicodeSegmentation::graphemes(text, true).collect::<Vec<&str>>();
74    graphemes.push("\n");
75    graphemes
76  }
77
78  pub fn init_tag(tag: &str) -> Vec<&str> {
79    UnicodeSegmentation::graphemes(tag, true).collect::<Vec<&str>>()
80  }
81
82  pub fn is_new_line(grapheme: &str) -> bool {
83    match grapheme {
84      "\r" | "\n" | "\r\n" => true,
85      _ => false,
86    }
87  }
88
89  pub fn is_numeric(grapheme: &str) -> bool {
90    grapheme.chars().next().unwrap().is_numeric()
91  }
92
93  pub fn is_alpha(grapheme: &str) -> bool {
94    grapheme.chars().next().unwrap().is_alphabetic()
95  }
96
97  pub fn is_emoji(grapheme: &str) -> bool {
98    let ch = grapheme.chars().next().unwrap();
99    !(ch.is_alphanumeric() || ch.is_ascii())
100  }
101
102  pub fn width(grapheme: &str) -> usize {
103    // TODO: uniode width?
104    let ch = grapheme.chars().next().unwrap();
105    if ch == '\t' {
106      1
107    } else if ch.is_control() {
108      0
109    } else {
110      1
111    }
112  }
113}
114
115/// Just alias
116pub type ParseResult<'a, O> = IResult<ParseString<'a>, O, ParseError<'a>>;
117
118/// The input type for nom parsers. Instead of holding the actual input
119/// string, this struct only holds a reference to that string so that it
120/// can be cloned at much lower cost.
121#[derive(Clone, Debug)]
122pub struct ParseString<'a> {
123  /// Source code
124  pub graphemes: &'a Vec<&'a str>,
125  /// Error report, a list of (error_location, error_context)
126  pub error_log: Vec<(SourceRange, ParseErrorDetail)>,
127  /// Point at the next grapheme to consume
128  pub cursor: usize,
129  /// Location of the grapheme pointed by cursor
130  pub location: SourceLocation,
131}
132
133impl<'a> ParseString<'a> {
134  /// Must always point a an actual string
135  pub fn new(graphemes: &'a Vec<&'a str>) -> Self {
136    ParseString {
137      graphemes,
138      error_log: vec![],
139      cursor: 0,
140      location: SourceLocation { row: 1, col: 1 },
141    }
142  }
143
144  pub fn peek(&self, n: usize) -> Option<&str> {
145    self.graphemes.get(self.cursor + n).copied()
146  }
147
148  pub fn current(&self) -> Option<&str> {
149    self.graphemes.get(self.cursor).copied()
150  }
151
152  pub fn next(&self) -> Option<&str> {
153    self.graphemes.get(self.cursor + 1).copied()
154  }
155
156  /// If current location matches the tag, consume the matched string.
157  fn consume_tag(&mut self, tag: &str) -> Option<String> {
158    if self.is_empty() {
159      return None;
160    }
161    let current = self.graphemes[self.cursor];
162
163    let gs = graphemes::init_tag(tag); 
164    let gs_len = gs.len();
165
166    // Must have enough remaining characters
167    if self.len() < gs_len {
168      return None;
169    }
170
171    // Try to match the tag
172    let mut tmp_location = self.location;
173    for i in 0..gs_len {
174      let c = self.cursor + i;
175      let g = self.graphemes[c];
176      if g != gs[i] {
177        return None;
178      }
179      if graphemes::is_new_line(g) {
180        if !self.is_last_grapheme(c) {
181          tmp_location.row += 1;
182          tmp_location.col = 1;
183        }
184      } else {
185        tmp_location.col += graphemes::width(g);
186      }
187    }
188    // Tag matched, commit change
189    self.cursor += gs_len;
190    self.location = tmp_location;
191    Some(tag.to_string())
192  }
193
194  /// Mutate self by consuming one grapheme
195  fn consume_one(&mut self) -> Option<String> {
196    if self.is_empty() {
197      return None;
198    }
199    let g = self.graphemes[self.cursor];
200    if graphemes::is_new_line(g) {
201      if !self.is_last_grapheme(self.cursor) {
202        self.location.row += 1;
203        self.location.col = 1;
204      }
205    } else {
206      self.location.col += graphemes::width(g);
207    }
208    self.cursor += 1;
209    Some(g.to_string())
210  }
211
212
213  /// If current location matches any emoji, consume the matched string.
214  fn consume_emoji(&mut self) -> Option<String> {
215    if self.is_empty() {
216      return None;
217    }
218    let g = self.graphemes[self.cursor];
219    
220    if graphemes::is_emoji(g) {
221      self.cursor += 1;
222      self.location.col += graphemes::width(g);
223      Some(g.to_string())
224    } else {
225      None
226    }
227  }
228
229  /// If current location matches any alpha char, consume the matched string.
230  fn consume_alpha(&mut self) -> Option<String> {
231    if self.is_empty() {
232      return None;
233    }
234    let g = self.graphemes[self.cursor];
235    if graphemes::is_alpha(g) {
236      self.cursor += 1;
237      self.location.col += graphemes::width(g);
238      Some(g.to_string())
239    } else {
240      None
241    }
242  }
243
244  /// If current location matches any digit, consume the matched string.
245  fn consume_digit(&mut self) -> Option<String> {
246    if self.is_empty() {
247      return None;
248    }
249    let g = self.graphemes[self.cursor];
250    if graphemes::is_numeric(g) {
251      self.cursor += 1;
252      self.location.col += graphemes::width(g);
253      Some(g.to_string())
254    } else {
255      None
256    }
257  }
258
259  /// Get cursor's location in source code
260  fn loc(&self) -> SourceLocation {
261    self.location
262  }
263
264  /// Test whether the grapheme pointed by cursor is the last grapheme
265  fn is_last_grapheme(&self, c: usize) -> bool {
266    (self.graphemes.len() - 1 - c) == 0
267  }
268
269  /// Get remaining (unparsed) length
270  pub fn len(&self) -> usize {
271    self.graphemes.len() - self.cursor
272  }
273  
274  pub fn is_empty(&self) -> bool {
275    self.len() == 0
276  }
277
278  /// For debug purpose
279  fn output(&self) {
280              
281    println!("───────────────────{}", self.len());
282    for i in self.cursor..self.graphemes.len() {
283      print!("{}", self.graphemes[i]);
284    }
285    println!();
286    println!("───────────────────");
287  }
288}
289
290/// Required by nom
291impl<'a> nom::InputLength for ParseString<'a> {
292  fn input_len(&self) -> usize {
293    self.len()
294  }
295}
296
297/// The part of error context that's independent to its cause location.
298#[derive(Clone, Debug)]
299pub struct ParseErrorDetail {
300  pub message: &'static str,
301  pub annotation_rngs: Vec<SourceRange>,
302}
303
304/// The error type for the nom parser, which handles full error context
305/// (location + detail) and ownership of the input ParseString.
306///
307/// Eventually error context will be logged and ownership will be moved out.
308#[derive(Clone, Debug)]
309pub struct ParseError<'a> {
310  /// Cause range is defined as [start, end), where `start` points at the first
311  /// character that's catched by a label, and `end` points at the next 
312  /// character of the character that didn't match.
313  ///
314  /// Example:
315  ///   index:  1234567
316  ///   input:  abcdefg
317  ///   error:   ~~~^
318  ///   range:   |   |
319  ///           [2,  5)
320  ///
321  pub cause_range: SourceRange,
322  /// Hold ownership to the input ParseString
323  pub remaining_input: ParseString<'a>,
324  /// Detailed information about this error
325  pub error_detail: ParseErrorDetail,
326}
327
328impl<'a> ParseError<'a> {
329  /// Create a new error at current location of the input, with given message
330  /// and empty annotations.  Ownership of the input is also passed into this
331  /// error object.
332  pub fn new(input: ParseString<'a>, msg: &'static str) -> Self {
333    let start = input.loc();
334    let mut end = start;
335    end.col += 1;
336    ParseError {
337      cause_range: SourceRange { start, end },
338      remaining_input: input,
339      error_detail: ParseErrorDetail {
340        message: msg,
341        annotation_rngs: vec![],
342      }
343    }
344  }
345
346  /// Add self to the error log of input string.
347  fn log(&mut self) {
348    self.remaining_input.error_log.push((self.cause_range, self.error_detail.clone()));
349  }
350}
351
352/// Required by nom
353impl<'a> nom::error::ParseError<ParseString<'a>> for ParseError<'a> {
354  /// Not used, unless we have logical error
355  fn from_error_kind(input: ParseString<'a>,
356                      _kind: nom::error::ErrorKind) -> Self {
357    ParseError::new(input, "Unexpected error")
358  }
359
360  /// Probably not used
361  fn append(_input: ParseString<'a>,
362            _kind: nom::error::ErrorKind,
363            other: Self) -> Self {
364    other
365  }
366
367  /// Barely used, but we do want to keep the error with larger depth.
368  fn or(self, other: Self) -> Self {
369    let self_start = self.cause_range.start;
370    let other_start = other.cause_range.start;
371    if self_start > other_start {
372      self
373    } else {
374      other
375    }
376  }
377}
378
379/// This struct is responsible for analysing text, interpreting indices
380/// and ranges, and producing formatted messages.
381pub struct TextFormatter<'a> {
382  graphemes: Vec<&'a str>,
383  line_beginnings: Vec<usize>,
384  end_index: usize,
385}
386
387impl<'a> TextFormatter<'a> {
388  pub fn new(text: &'a str) -> Self {
389    let graphemes = graphemes::init_source(text);
390    let mut line_beginnings = vec![0];
391    for i in 0..graphemes.len() {
392      if graphemes::is_new_line(graphemes[i]) {
393        line_beginnings.push(i + 1);
394      }
395    }
396    line_beginnings.pop();
397    TextFormatter {
398      end_index: graphemes.len(),
399      graphemes,
400      line_beginnings,
401    }
402  }
403
404  // Index interpreter
405
406  fn get_line_range(&self, linenum: usize) -> Option<(usize, usize)> {
407    let line_index = linenum - 1;
408    if line_index >= self.line_beginnings.len() {
409      return None;
410    }
411    if linenum == self.line_beginnings.len() {  // asking for the last line
412      return Some((self.line_beginnings[line_index], self.end_index));
413    }
414    Some((self.line_beginnings[line_index], self.line_beginnings[linenum]))
415  }
416
417  fn get_text_by_linenum(&self, linenum: usize) -> String {
418    let (start, end) = match self.get_line_range(linenum) {
419      Some(v) => v,
420      None => return "\n".to_string(),
421    };
422    let mut s = self.graphemes[start..end].iter().map(|s| *s).collect::<String>();
423    if !s.ends_with("\n") {
424      s.push('\n');
425    }
426    s
427  }
428
429  fn get_textlen_by_linenum(&self, linenum: usize) -> usize {
430    let (start, end) = match self.get_line_range(linenum) {
431      Some(v) => v,
432      None => return 1,
433    };
434    let mut len = 0;
435    for i in start..end {
436      len += graphemes::width(self.graphemes[i]);
437    }
438    len + 1
439  }
440
441  // FormattedString printer
442
443  fn heading_color(s: &str) -> String {
444    s.truecolor(246, 192, 78).bold().to_string()
445  }
446
447  fn location_color(s: &str) -> String {
448    s.truecolor(0,187,204).bold().to_string()
449  }
450
451  fn linenum_color(s: &str) -> String {
452    s.truecolor(0,187,204).bold().to_string()
453  }
454
455  fn text_color(s: &str) -> String {
456    s.to_string()
457  }
458
459  fn annotation_color(s: &str) -> String {
460    s.truecolor(102,51,153).bold().to_string()
461  }
462
463  fn error_color(s: &str) -> String {
464    s.truecolor(170,51,85).bold().to_string()
465  }
466
467  fn ending_color(s: &str) -> String {
468    s.truecolor(246, 192, 78).bold().to_string()
469  }
470
471  fn err_heading(index: usize) -> String {
472    let n = index + 1;
473    let d = "────────────────────────";
474    let s = format!("{} syntax error #{} {}\n", d, n, d);
475    Self::heading_color(&s)
476  }
477
478  fn err_location(&self, ctx: &ParserErrorContext) -> String {
479    let err_end = ctx.cause_rng.end;
480    // error range will not ends at first column, so `minus 1` here is safe
481    let (row, col) = (err_end.row, err_end.col - 1);
482    let s = format!("@location:{}:{}\n", row, col);
483    Self::location_color(&s)
484  }
485
486  fn err_context(&self, ctx: &ParserErrorContext) -> String {
487    let mut result = String::new();
488
489    let mut annotation_rngs = ctx.annotation_rngs.clone();
490    annotation_rngs.push(ctx.cause_rng);
491
492    // the lines to print (1-indexed)
493    let mut lines_to_print: Vec<usize> = vec![];
494    for rng in &annotation_rngs {
495      let r1 = rng.start.row;
496      // if range ends at first column, it doesn't reach that row
497      let r2 = if rng.end.col == 1 {
498        usize::max(rng.start.row, rng.end.row - 1)
499      } else {
500        rng.end.row
501      };
502      for i in r1..=r2 {
503        lines_to_print.push(i);
504      }
505    }
506    lines_to_print.sort();
507    lines_to_print.dedup();
508
509    // the annotations on each line
510    // <linenum, Vec<(start_col, rng_len, is_major, is_cause)>>
511    let mut range_table: HashMap<usize, Vec<(usize, usize, bool, bool)>> = HashMap::new();
512    for linenum in &lines_to_print {
513      range_table.insert(*linenum, vec![]);
514    }
515    let n = annotation_rngs.len() - 1;  // if i == n, it's the last rng, i.e. the cause rng
516    for (i, rng) in annotation_rngs.iter().enumerate() {
517      // c2 might be 0
518      let (r1, c1) = (rng.start.row, rng.start.col);
519      let (r2, c2) = (rng.end.row, rng.end.col - 1);
520      if r1 == r2 {  // the entire range is on one line
521        if c2 >= c1 {  // and the range has non-zero length
522          range_table.get_mut(&r1).unwrap().push((c1, c2 - c1 + 1, true, i == n));
523        }
524      } else {  // the range spans over multiple lines
525        range_table.get_mut(&r1).unwrap().push((c1, usize::MAX, i != n, i == n));
526        for r in r1+1..r2 {
527          range_table.get_mut(&r).unwrap().push((1, usize::MAX, false, i == n));
528        }
529        if c2 != 0 {  // only add the last line if it hfnas non-zero length
530          range_table.get_mut(&r2).unwrap().push((1, c2, i == n, i == n));
531        }
532      }
533    }
534
535    // other data for printing
536    let dots = "...";
537    let indentation = " ";
538    let vert_split1 = " │";
539    let vert_split2 = "  ";
540    let arrow = "^";
541    let tilde = "~";
542    let lines_str: Vec<String> = lines_to_print.iter().map(|i| i.to_string()).collect();
543    let row_str_len = usize::max(lines_str.last().unwrap().len(), dots.len());
544
545    // print source code
546    for i in 0..lines_to_print.len() {
547      // [... | ]
548      if i != 0 && (lines_to_print[i] - lines_to_print[i-1] != 1) {
549        result.push_str(indentation);
550        for _ in 3..row_str_len { result.push(' '); }
551        result.push_str(&Self::linenum_color(dots));
552        result.push_str(&Self::linenum_color(vert_split1));
553        result.push('\n');
554      }
555
556      // [    | ]
557      result.push_str(indentation);
558      for _ in 0..row_str_len { result.push(' '); }
559      result.push_str(&Self::linenum_color(vert_split1));
560      result.push('\n');
561
562      // [row |  program text...]
563      let text = self.get_text_by_linenum(lines_to_print[i]);
564      result.push_str(indentation);
565      for _ in 0..row_str_len-lines_str[i].len() { result.push(' '); }
566      result.push_str(&Self::linenum_color(&lines_str[i]));
567      result.push_str(&Self::linenum_color(vert_split1));
568      result.push_str(&Self::text_color(&text));
569
570      // [    |    ^~~~]
571      result.push_str(indentation);
572      for _ in 0..row_str_len { result.push(' '); }
573      result.push_str(&Self::linenum_color(vert_split1));
574      let mut curr_col = 1;
575      let line_len = self.get_textlen_by_linenum(lines_to_print[i]);
576      let rngs = range_table.get(&lines_to_print[i]).unwrap();
577      for (start, len, major, cause) in rngs {
578        let max_len = usize::max(1, usize::min(*len, line_len - curr_col + 1));
579        for _ in curr_col..*start { result.push(' '); }
580        if *cause {
581          for _ in 0..max_len-1 {
582            result.push_str(&Self::error_color(tilde));
583          }
584          if *major {
585            result.push_str(&Self::error_color(arrow));
586          } else {
587            result.push_str(&Self::error_color(tilde));
588          }
589        } else {
590          if *major {
591            result.push_str(&Self::annotation_color(arrow));
592          } else {
593            result.push_str(&Self::annotation_color(tilde));
594          }
595          for _ in 0..max_len-1 {
596            result.push_str(&Self::annotation_color(tilde));
597          }
598        }
599        curr_col = start + max_len;
600      }
601      result.push('\n');
602    }
603
604    // print error message;
605    // error range never ends at first column, so it's safe to `minus 1` here
606    let cause_col = ctx.cause_rng.end.col - 1;
607    result.push_str(indentation);
608    for _ in 0..row_str_len { result.push(' '); }
609    result.push_str(vert_split2);
610    for _ in 0..cause_col-1 { result.push(' '); }
611    result.push_str(&Self::error_color(&ctx.err_message));
612    result.push('\n');
613
614    result
615  }
616
617  fn err_ending(d: usize) -> String {
618    let s = format!("... and {} other error{} not shown\n", d, if d == 1 {""} else {"s"});
619    Self::heading_color(&s)
620  }
621
622  /// Get formatted error message.
623  pub fn format_error(&self, errors: &ParserErrorReport) -> String {
624    let n = usize::min(errors.len(), 10);
625    let mut result = String::new();
626    result.push('\n');
627    for i in 0..n {
628      let ctx = &errors[i];
629      result.push_str(&Self::err_heading(i));
630      result.push_str(&self.err_location(ctx));
631      result.push_str(&self.err_context(ctx));
632      result.push_str("\n\n");
633    }
634    let d = errors.len() - n;
635    if d != 0 {
636      result.push_str(&Self::err_ending(d));
637    }
638    result
639  }
640}