mech_syntax/
lib.rs

1// # Syntax
2
3#![cfg_attr(feature = "no-std", no_std)]
4#![cfg_attr(feature = "no-std", alloc)]
5#![feature(extract_if)]
6#![feature(get_mut_unchecked)]
7#![allow(dead_code)]
8#![allow(warnings)]
9#![feature(step_trait)]
10
11extern crate mech_core;
12#[cfg(feature="no-std")] #[macro_use] extern crate alloc;
13#[cfg(not(feature = "no-std"))] extern crate core;
14extern crate hashbrown;
15extern crate nom;
16extern crate nom_unicode;
17#[macro_use]
18extern crate lazy_static;
19extern crate nalgebra as na;
20extern crate tabled;
21extern crate libm;
22
23use mech_core::*;
24use mech_core::nodes::*;
25use std::cell::RefCell;
26use std::rc::Rc;
27
28#[cfg(not(feature = "no-std"))] use core::fmt;
29#[cfg(feature = "no-std")] use alloc::fmt;
30#[cfg(feature = "no-std")] use alloc::string::String;
31#[cfg(feature = "no-std")] use alloc::vec::Vec;
32use nom::{
33  IResult,
34  branch::alt,
35  sequence::tuple,
36  combinator::{opt, eof},
37  multi::{many1, many_till, many0, separated_list1},
38  Err,
39};
40
41use std::collections::HashMap;
42use colored::*;
43
44pub mod mechdown;
45pub mod expressions;
46pub mod statements;
47pub mod structures;
48pub mod base;
49pub mod parser;
50pub mod formatter;
51
52pub use crate::parser::*;
53pub use crate::mechdown::*;
54pub use crate::expressions::*;
55pub use crate::statements::*;
56pub use crate::structures::*;
57pub use crate::base::*;
58pub use crate::formatter::*;
59
60
61/// Unicode grapheme group utilities.
62/// Current implementation does not guarantee correct behavior for
63/// all possible unicode characters.
64pub mod graphemes {
65  use unicode_segmentation::UnicodeSegmentation;
66
67  /// Obtain unicode grapheme groups from input source, then make sure
68  /// it ends with new_line.  Many functions in the parser assume input
69  /// ends with new_line.
70  pub fn init_source(text: &str) -> Vec<&str> {
71    let mut graphemes = UnicodeSegmentation::graphemes(text, true).collect::<Vec<&str>>();
72    graphemes.push("\n");
73    graphemes
74  }
75
76  pub fn init_tag(tag: &str) -> Vec<&str> {
77    UnicodeSegmentation::graphemes(tag, true).collect::<Vec<&str>>()
78  }
79
80  pub fn is_new_line(grapheme: &str) -> bool {
81    match grapheme {
82      "\r" | "\n" | "\r\n" => true,
83      _ => false,
84    }
85  }
86
87  pub fn is_numeric(grapheme: &str) -> bool {
88    grapheme.chars().next().unwrap().is_numeric()
89  }
90
91  pub fn is_alpha(grapheme: &str) -> bool {
92    grapheme.chars().next().unwrap().is_alphabetic()
93  }
94
95  pub fn is_emoji(grapheme: &str) -> bool {
96    let ch = grapheme.chars().next().unwrap();
97    !(ch.is_alphanumeric() || ch.is_ascii())
98  }
99
100  pub fn width(grapheme: &str) -> usize {
101    // TODO: uniode width?
102    let ch = grapheme.chars().next().unwrap();
103    if ch == '\t' {
104      1
105    } else if ch.is_control() {
106      0
107    } else {
108      1
109    }
110  }
111}
112
113/// Just alias
114pub type ParseResult<'a, O> = IResult<ParseString<'a>, O, ParseError<'a>>;
115
116/// The input type for nom parsers. Instead of holding the actual input
117/// string, this struct only holds a reference to that string so that it
118/// can be cloned at much lower cost.
119#[derive(Clone, Debug)]
120pub struct ParseString<'a> {
121  /// Source code
122  pub graphemes: &'a Vec<&'a str>,
123  /// Error report, a list of (error_location, error_context)
124  pub error_log: Vec<(SourceRange, ParseErrorDetail)>,
125  /// Point at the next grapheme to consume
126  pub cursor: usize,
127  /// Location of the grapheme pointed by cursor
128  pub location: SourceLocation,
129}
130
131impl<'a> ParseString<'a> {
132  /// Must always point a an actual string
133  pub fn new(graphemes: &'a Vec<&'a str>) -> Self {
134    ParseString {
135      graphemes,
136      error_log: vec![],
137      cursor: 0,
138      location: SourceLocation { row: 1, col: 1 },
139    }
140  }
141
142  pub fn current(&self) -> Option<&str> {
143    self.graphemes.get(self.cursor).copied()
144  }
145
146  /// If current location matches the tag, consume the matched string.
147  fn consume_tag(&mut self, tag: &str) -> Option<String> {
148    if self.is_empty() {
149      return None;
150    }
151    let current = self.graphemes[self.cursor];
152
153    let gs = graphemes::init_tag(tag); 
154    let gs_len = gs.len();
155
156    // Must have enough remaining characters
157    if self.len() < gs_len {
158      return None;
159    }
160
161    // Try to match the tag
162    let mut tmp_location = self.location;
163    for i in 0..gs_len {
164      let c = self.cursor + i;
165      let g = self.graphemes[c];
166      if g != gs[i] {
167        return None;
168      }
169      if graphemes::is_new_line(g) {
170        if !self.is_last_grapheme(c) {
171          tmp_location.row += 1;
172          tmp_location.col = 1;
173        }
174      } else {
175        tmp_location.col += graphemes::width(g);
176      }
177    }
178    // Tag matched, commit change
179    self.cursor += gs_len;
180    self.location = tmp_location;
181    Some(tag.to_string())
182  }
183
184  /// Mutate self by consuming one grapheme
185  fn consume_one(&mut self) -> Option<String> {
186    if self.is_empty() {
187      return None;
188    }
189    let g = self.graphemes[self.cursor];
190    if graphemes::is_new_line(g) {
191      if !self.is_last_grapheme(self.cursor) {
192        self.location.row += 1;
193        self.location.col = 1;
194      }
195    } else {
196      self.location.col += graphemes::width(g);
197    }
198    self.cursor += 1;
199    Some(g.to_string())
200  }
201
202
203  /// If current location matches any emoji, consume the matched string.
204  fn consume_emoji(&mut self) -> Option<String> {
205    if self.is_empty() {
206      return None;
207    }
208    let g = self.graphemes[self.cursor];
209    
210    if graphemes::is_emoji(g) {
211      self.cursor += 1;
212      self.location.col += graphemes::width(g);
213      Some(g.to_string())
214    } else {
215      None
216    }
217  }
218
219  /// If current location matches any alpha char, consume the matched string.
220  fn consume_alpha(&mut self) -> Option<String> {
221    if self.is_empty() {
222      return None;
223    }
224    let g = self.graphemes[self.cursor];
225    if graphemes::is_alpha(g) {
226      self.cursor += 1;
227      self.location.col += graphemes::width(g);
228      Some(g.to_string())
229    } else {
230      None
231    }
232  }
233
234  /// If current location matches any digit, consume the matched string.
235  fn consume_digit(&mut self) -> Option<String> {
236    if self.is_empty() {
237      return None;
238    }
239    let g = self.graphemes[self.cursor];
240    if graphemes::is_numeric(g) {
241      self.cursor += 1;
242      self.location.col += graphemes::width(g);
243      Some(g.to_string())
244    } else {
245      None
246    }
247  }
248
249  /// Get cursor's location in source code
250  fn loc(&self) -> SourceLocation {
251    self.location
252  }
253
254  /// Test whether the grapheme pointed by cursor is the last grapheme
255  fn is_last_grapheme(&self, c: usize) -> bool {
256    (self.graphemes.len() - 1 - c) == 0
257  }
258
259  /// Get remaining (unparsed) length
260  pub fn len(&self) -> usize {
261    self.graphemes.len() - self.cursor
262  }
263  
264  pub fn is_empty(&self) -> bool {
265    self.len() == 0
266  }
267
268  /// For debug purpose
269  fn output(&self) {
270              
271    println!("───────────────────{}", self.len());
272    for i in self.cursor..self.graphemes.len() {
273      print!("{}", self.graphemes[i]);
274    }
275    println!();
276    println!("───────────────────");
277  }
278}
279
280/// Required by nom
281impl<'a> nom::InputLength for ParseString<'a> {
282  fn input_len(&self) -> usize {
283    self.len()
284  }
285}
286
287/// The part of error context that's independent to its cause location.
288#[derive(Clone, Debug)]
289pub struct ParseErrorDetail {
290  pub message: &'static str,
291  pub annotation_rngs: Vec<SourceRange>,
292}
293
294/// The error type for the nom parser, which handles full error context
295/// (location + detail) and ownership of the input ParseString.
296///
297/// Eventually error context will be logged and ownership will be moved out.
298#[derive(Clone, Debug)]
299pub struct ParseError<'a> {
300  /// Cause range is defined as [start, end), where `start` points at the first
301  /// character that's catched by a label, and `end` points at the next 
302  /// character of the character that didn't match.
303  ///
304  /// Example:
305  ///   index:  1234567
306  ///   input:  abcdefg
307  ///   error:   ~~~^
308  ///   range:   |   |
309  ///           [2,  5)
310  ///
311  pub cause_range: SourceRange,
312  /// Hold ownership to the input ParseString
313  pub remaining_input: ParseString<'a>,
314  /// Detailed information about this error
315  pub error_detail: ParseErrorDetail,
316}
317
318impl<'a> ParseError<'a> {
319  /// Create a new error at current location of the input, with given message
320  /// and empty annotations.  Ownership of the input is also passed into this
321  /// error object.
322  pub fn new(input: ParseString<'a>, msg: &'static str) -> Self {
323    let start = input.loc();
324    let mut end = start;
325    end.col += 1;
326    ParseError {
327      cause_range: SourceRange { start, end },
328      remaining_input: input,
329      error_detail: ParseErrorDetail {
330        message: msg,
331        annotation_rngs: vec![],
332      }
333    }
334  }
335
336  /// Add self to the error log of input string.
337  fn log(&mut self) {
338    self.remaining_input.error_log.push((self.cause_range, self.error_detail.clone()));
339  }
340}
341
342/// Required by nom
343impl<'a> nom::error::ParseError<ParseString<'a>> for ParseError<'a> {
344  /// Not used, unless we have logical error
345  fn from_error_kind(input: ParseString<'a>,
346                      _kind: nom::error::ErrorKind) -> Self {
347    ParseError::new(input, "Unexpected error")
348  }
349
350  /// Probably not used
351  fn append(_input: ParseString<'a>,
352            _kind: nom::error::ErrorKind,
353            other: Self) -> Self {
354    other
355  }
356
357  /// Barely used, but we do want to keep the error with larger depth.
358  fn or(self, other: Self) -> Self {
359    let self_start = self.cause_range.start;
360    let other_start = other.cause_range.start;
361    if self_start > other_start {
362      self
363    } else {
364      other
365    }
366  }
367}
368
369
370
371/// This struct is responsible for analysing text, interpreting indices
372/// and ranges, and producing formatted messages.
373pub struct TextFormatter<'a> {
374  graphemes: Vec<&'a str>,
375  line_beginnings: Vec<usize>,
376  end_index: usize,
377}
378
379impl<'a> TextFormatter<'a> {
380  pub fn new(text: &'a str) -> Self {
381    let graphemes = graphemes::init_source(text);
382    let mut line_beginnings = vec![0];
383    for i in 0..graphemes.len() {
384      if graphemes::is_new_line(graphemes[i]) {
385        line_beginnings.push(i + 1);
386      }
387    }
388    line_beginnings.pop();
389    TextFormatter {
390      end_index: graphemes.len(),
391      graphemes,
392      line_beginnings,
393    }
394  }
395
396  // Index interpreter
397
398  fn get_line_range(&self, linenum: usize) -> Option<(usize, usize)> {
399    let line_index = linenum - 1;
400    if line_index >= self.line_beginnings.len() {
401      return None;
402    }
403    if linenum == self.line_beginnings.len() {  // asking for the last line
404      return Some((self.line_beginnings[line_index], self.end_index));
405    }
406    Some((self.line_beginnings[line_index], self.line_beginnings[linenum]))
407  }
408
409  fn get_text_by_linenum(&self, linenum: usize) -> String {
410    let (start, end) = match self.get_line_range(linenum) {
411      Some(v) => v,
412      None => return "\n".to_string(),
413    };
414    let mut s = self.graphemes[start..end].iter().map(|s| *s).collect::<String>();
415    if !s.ends_with("\n") {
416      s.push('\n');
417    }
418    s
419  }
420
421  fn get_textlen_by_linenum(&self, linenum: usize) -> usize {
422    let (start, end) = match self.get_line_range(linenum) {
423      Some(v) => v,
424      None => return 1,
425    };
426    let mut len = 0;
427    for i in start..end {
428      len += graphemes::width(self.graphemes[i]);
429    }
430    len + 1
431  }
432
433  // FormattedString printer
434
435  fn heading_color(s: &str) -> String {
436    s.truecolor(246, 192, 78).bold().to_string()
437  }
438
439  fn location_color(s: &str) -> String {
440    s.truecolor(0,187,204).bold().to_string()
441  }
442
443  fn linenum_color(s: &str) -> String {
444    s.truecolor(0,187,204).bold().to_string()
445  }
446
447  fn text_color(s: &str) -> String {
448    s.to_string()
449  }
450
451  fn annotation_color(s: &str) -> String {
452    s.truecolor(102,51,153).bold().to_string()
453  }
454
455  fn error_color(s: &str) -> String {
456    s.truecolor(170,51,85).bold().to_string()
457  }
458
459  fn ending_color(s: &str) -> String {
460    s.truecolor(246, 192, 78).bold().to_string()
461  }
462
463  fn err_heading(index: usize) -> String {
464    let n = index + 1;
465    let d = "────────────────────────";
466    let s = format!("{} syntax error #{} {}\n", d, n, d);
467    Self::heading_color(&s)
468  }
469
470  fn err_location(&self, ctx: &ParserErrorContext) -> String {
471    let err_end = ctx.cause_rng.end;
472    // error range will not ends at first column, so `minus 1` here is safe
473    let (row, col) = (err_end.row, err_end.col - 1);
474    let s = format!("@location:{}:{}\n", row, col);
475    Self::location_color(&s)
476  }
477
478  fn err_context(&self, ctx: &ParserErrorContext) -> String {
479    let mut result = String::new();
480
481    let mut annotation_rngs = ctx.annotation_rngs.clone();
482    annotation_rngs.push(ctx.cause_rng);
483
484    // the lines to print (1-indexed)
485    let mut lines_to_print: Vec<usize> = vec![];
486    for rng in &annotation_rngs {
487      let r1 = rng.start.row;
488      // if range ends at first column, it doesn't reach that row
489      let r2 = if rng.end.col == 1 {
490        usize::max(rng.start.row, rng.end.row - 1)
491      } else {
492        rng.end.row
493      };
494      for i in r1..=r2 {
495        lines_to_print.push(i);
496      }
497    }
498    lines_to_print.sort();
499    lines_to_print.dedup();
500
501    // the annotations on each line
502    // <linenum, Vec<(start_col, rng_len, is_major, is_cause)>>
503    let mut range_table: HashMap<usize, Vec<(usize, usize, bool, bool)>> = HashMap::new();
504    for linenum in &lines_to_print {
505      range_table.insert(*linenum, vec![]);
506    }
507    let n = annotation_rngs.len() - 1;  // if i == n, it's the last rng, i.e. the cause rng
508    for (i, rng) in annotation_rngs.iter().enumerate() {
509      // c2 might be 0
510      let (r1, c1) = (rng.start.row, rng.start.col);
511      let (r2, c2) = (rng.end.row, rng.end.col - 1);
512      if r1 == r2 {  // the entire range is on one line
513        if c2 >= c1 {  // and the range has non-zero length
514          range_table.get_mut(&r1).unwrap().push((c1, c2 - c1 + 1, true, i == n));
515        }
516      } else {  // the range spans over multiple lines
517        range_table.get_mut(&r1).unwrap().push((c1, usize::MAX, i != n, i == n));
518        for r in r1+1..r2 {
519          range_table.get_mut(&r).unwrap().push((1, usize::MAX, false, i == n));
520        }
521        if c2 != 0 {  // only add the last line if it hfnas non-zero length
522          range_table.get_mut(&r2).unwrap().push((1, c2, i == n, i == n));
523        }
524      }
525    }
526
527    // other data for printing
528    let dots = "...";
529    let indentation = " ";
530    let vert_split1 = " │";
531    let vert_split2 = "  ";
532    let arrow = "^";
533    let tilde = "~";
534    let lines_str: Vec<String> = lines_to_print.iter().map(|i| i.to_string()).collect();
535    let row_str_len = usize::max(lines_str.last().unwrap().len(), dots.len());
536
537    // print source code
538    for i in 0..lines_to_print.len() {
539      // [... | ]
540      if i != 0 && (lines_to_print[i] - lines_to_print[i-1] != 1) {
541        result.push_str(indentation);
542        for _ in 3..row_str_len { result.push(' '); }
543        result.push_str(&Self::linenum_color(dots));
544        result.push_str(&Self::linenum_color(vert_split1));
545        result.push('\n');
546      }
547
548      // [    | ]
549      result.push_str(indentation);
550      for _ in 0..row_str_len { result.push(' '); }
551      result.push_str(&Self::linenum_color(vert_split1));
552      result.push('\n');
553
554      // [row |  program text...]
555      let text = self.get_text_by_linenum(lines_to_print[i]);
556      result.push_str(indentation);
557      for _ in 0..row_str_len-lines_str[i].len() { result.push(' '); }
558      result.push_str(&Self::linenum_color(&lines_str[i]));
559      result.push_str(&Self::linenum_color(vert_split1));
560      result.push_str(&Self::text_color(&text));
561
562      // [    |    ^~~~]
563      result.push_str(indentation);
564      for _ in 0..row_str_len { result.push(' '); }
565      result.push_str(&Self::linenum_color(vert_split1));
566      let mut curr_col = 1;
567      let line_len = self.get_textlen_by_linenum(lines_to_print[i]);
568      let rngs = range_table.get(&lines_to_print[i]).unwrap();
569      for (start, len, major, cause) in rngs {
570        let max_len = usize::max(1, usize::min(*len, line_len - curr_col + 1));
571        for _ in curr_col..*start { result.push(' '); }
572        if *cause {
573          for _ in 0..max_len-1 {
574            result.push_str(&Self::error_color(tilde));
575          }
576          if *major {
577            result.push_str(&Self::error_color(arrow));
578          } else {
579            result.push_str(&Self::error_color(tilde));
580          }
581        } else {
582          if *major {
583            result.push_str(&Self::annotation_color(arrow));
584          } else {
585            result.push_str(&Self::annotation_color(tilde));
586          }
587          for _ in 0..max_len-1 {
588            result.push_str(&Self::annotation_color(tilde));
589          }
590        }
591        curr_col = start + max_len;
592      }
593      result.push('\n');
594    }
595
596    // print error message;
597    // error range never ends at first column, so it's safe to `minus 1` here
598    let cause_col = ctx.cause_rng.end.col - 1;
599    result.push_str(indentation);
600    for _ in 0..row_str_len { result.push(' '); }
601    result.push_str(vert_split2);
602    for _ in 0..cause_col-1 { result.push(' '); }
603    result.push_str(&Self::error_color(&ctx.err_message));
604    result.push('\n');
605
606    result
607  }
608
609  fn err_ending(d: usize) -> String {
610    let s = format!("... and {} other error{} not shown\n", d, if d == 1 {""} else {"s"});
611    Self::heading_color(&s)
612  }
613
614  /// Get formatted error message.
615  pub fn format_error(&self, errors: &ParserErrorReport) -> String {
616    let n = usize::min(errors.len(), 10);
617    let mut result = String::new();
618    result.push('\n');
619    for i in 0..n {
620      let ctx = &errors[i];
621      result.push_str(&Self::err_heading(i));
622      result.push_str(&self.err_location(ctx));
623      result.push_str(&self.err_context(ctx));
624      result.push_str("\n\n");
625    }
626    let d = errors.len() - n;
627    if d != 0 {
628      result.push_str(&Self::err_ending(d));
629    }
630    result
631  }
632}