mech_syntax/
lib.rs

1// # Syntax
2
3#![cfg_attr(feature = "no-std", no_std)]
4#![cfg_attr(feature = "no-std", alloc)]
5#![feature(extract_if)]
6#![feature(get_mut_unchecked)]
7#![allow(dead_code)]
8#![allow(warnings)]
9#![feature(step_trait)]
10
11extern crate mech_core;
12#[cfg(feature="no-std")] #[macro_use] extern crate alloc;
13#[cfg(not(feature = "no-std"))] extern crate core;
14extern crate hashbrown;
15extern crate nom;
16extern crate nom_unicode;
17extern crate nalgebra as na;
18extern crate tabled;
19extern crate libm;
20
21use mech_core::*;
22use mech_core::nodes::*;
23use std::cell::RefCell;
24use std::rc::Rc;
25
26#[cfg(not(feature = "no-std"))] use core::fmt;
27#[cfg(feature = "no-std")] use alloc::fmt;
28#[cfg(feature = "no-std")] use alloc::string::String;
29#[cfg(feature = "no-std")] use alloc::vec::Vec;
30use nom::{
31  IResult,
32  branch::alt,
33  sequence::tuple,
34  combinator::{opt, eof},
35  multi::{many1, many_till, many0, separated_list1},
36  Err,
37};
38
39use std::collections::HashMap;
40use colored::*;
41
42pub mod mechdown;
43pub mod expressions;
44pub mod statements;
45pub mod structures;
46pub mod base;
47pub mod parser;
48pub mod formatter;
49pub mod grammar;
50pub mod literals;
51pub mod state_machines;
52pub mod functions;
53
54pub use crate::parser::*;
55pub use crate::mechdown::*;
56pub use crate::expressions::*;
57pub use crate::statements::*;
58pub use crate::structures::*;
59pub use crate::base::*;
60pub use crate::formatter::*;
61pub use crate::grammar::*;
62pub use crate::literals::*;
63pub use crate::state_machines::*;
64pub use crate::functions::*;
65
66
67/// Unicode grapheme group utilities.
68/// Current implementation does not guarantee correct behavior for
69/// all possible unicode characters.
70pub mod graphemes {
71  use unicode_segmentation::UnicodeSegmentation;
72
73  /// Obtain unicode grapheme groups from input source, then make sure
74  /// it ends with new_line.  Many functions in the parser assume input
75  /// ends with new_line.
76  pub fn init_source(text: &str) -> Vec<&str> {
77    let mut graphemes = UnicodeSegmentation::graphemes(text, true).collect::<Vec<&str>>();
78    graphemes.push("\n");
79    graphemes
80  }
81
82  pub fn init_tag(tag: &str) -> Vec<&str> {
83    UnicodeSegmentation::graphemes(tag, true).collect::<Vec<&str>>()
84  }
85
86  pub fn is_new_line(grapheme: &str) -> bool {
87    match grapheme {
88      "\r" | "\n" | "\r\n" => true,
89      _ => false,
90    }
91  }
92
93  pub fn is_numeric(grapheme: &str) -> bool {
94    grapheme.chars().next().unwrap().is_numeric()
95  }
96
97  pub fn is_alpha(grapheme: &str) -> bool {
98    grapheme.chars().next().unwrap().is_alphabetic()
99  }
100
101  pub fn is_emoji(grapheme: &str) -> bool {
102    let ch = grapheme.chars().next().unwrap();
103    !(ch.is_alphanumeric() || ch.is_ascii())
104  }
105
106  pub fn width(grapheme: &str) -> usize {
107    // TODO: uniode width?
108    let ch = grapheme.chars().next().unwrap();
109    if ch == '\t' {
110      1
111    } else if ch.is_control() {
112      0
113    } else {
114      1
115    }
116  }
117}
118
119/// Just alias
120pub type ParseResult<'a, O> = IResult<ParseString<'a>, O, ParseError<'a>>;
121
122/// The input type for nom parsers. Instead of holding the actual input
123/// string, this struct only holds a reference to that string so that it
124/// can be cloned at much lower cost.
125#[derive(Clone, Debug)]
126pub struct ParseString<'a> {
127  /// Source code
128  pub graphemes: &'a Vec<&'a str>,
129  /// Error report, a list of (error_location, error_context)
130  pub error_log: Vec<(SourceRange, ParseErrorDetail)>,
131  /// Point at the next grapheme to consume
132  pub cursor: usize,
133  /// Location of the grapheme pointed by cursor
134  pub location: SourceLocation,
135}
136
137impl<'a> ParseString<'a> {
138  /// Must always point a an actual string
139  pub fn new(graphemes: &'a Vec<&'a str>) -> Self {
140    ParseString {
141      graphemes,
142      error_log: vec![],
143      cursor: 0,
144      location: SourceLocation { row: 1, col: 1 },
145    }
146  }
147
148  pub fn peek(&self, n: usize) -> Option<&str> {
149    self.graphemes.get(self.cursor + n).copied()
150  }
151
152  pub fn current(&self) -> Option<&str> {
153    self.graphemes.get(self.cursor).copied()
154  }
155
156  pub fn next(&self) -> Option<&str> {
157    self.graphemes.get(self.cursor + 1).copied()
158  }
159
160  /// If current location matches the tag, consume the matched string.
161  fn consume_tag(&mut self, tag: &str) -> Option<String> {
162    if self.is_empty() {
163      return None;
164    }
165    let current = self.graphemes[self.cursor];
166
167    let gs = graphemes::init_tag(tag); 
168    let gs_len = gs.len();
169
170    // Must have enough remaining characters
171    if self.len() < gs_len {
172      return None;
173    }
174
175    // Try to match the tag
176    let mut tmp_location = self.location;
177    for i in 0..gs_len {
178      let c = self.cursor + i;
179      let g = self.graphemes[c];
180      if g != gs[i] {
181        return None;
182      }
183      if graphemes::is_new_line(g) {
184        if !self.is_last_grapheme(c) {
185          tmp_location.row += 1;
186          tmp_location.col = 1;
187        }
188      } else {
189        tmp_location.col += graphemes::width(g);
190      }
191    }
192    // Tag matched, commit change
193    self.cursor += gs_len;
194    self.location = tmp_location;
195    Some(tag.to_string())
196  }
197
198  /// Mutate self by consuming one grapheme
199  fn consume_one(&mut self) -> Option<String> {
200    if self.is_empty() {
201      return None;
202    }
203    let g = self.graphemes[self.cursor];
204    if graphemes::is_new_line(g) {
205      if !self.is_last_grapheme(self.cursor) {
206        self.location.row += 1;
207        self.location.col = 1;
208      }
209    } else {
210      self.location.col += graphemes::width(g);
211    }
212    self.cursor += 1;
213    Some(g.to_string())
214  }
215
216
217  /// If current location matches any emoji, consume the matched string.
218  fn consume_emoji(&mut self) -> Option<String> {
219    if self.is_empty() {
220      return None;
221    }
222    let g = self.graphemes[self.cursor];
223    
224    if graphemes::is_emoji(g) {
225      self.cursor += 1;
226      self.location.col += graphemes::width(g);
227      Some(g.to_string())
228    } else {
229      None
230    }
231  }
232
233  /// If current location matches any alpha char, consume the matched string.
234  fn consume_alpha(&mut self) -> Option<String> {
235    if self.is_empty() {
236      return None;
237    }
238    let g = self.graphemes[self.cursor];
239    if graphemes::is_alpha(g) {
240      self.cursor += 1;
241      self.location.col += graphemes::width(g);
242      Some(g.to_string())
243    } else {
244      None
245    }
246  }
247
248  /// If current location matches any digit, consume the matched string.
249  fn consume_digit(&mut self) -> Option<String> {
250    if self.is_empty() {
251      return None;
252    }
253    let g = self.graphemes[self.cursor];
254    if graphemes::is_numeric(g) {
255      self.cursor += 1;
256      self.location.col += graphemes::width(g);
257      Some(g.to_string())
258    } else {
259      None
260    }
261  }
262
263  /// Get cursor's location in source code
264  fn loc(&self) -> SourceLocation {
265    self.location
266  }
267
268  /// Test whether the grapheme pointed by cursor is the last grapheme
269  fn is_last_grapheme(&self, c: usize) -> bool {
270    (self.graphemes.len() - 1 - c) == 0
271  }
272
273  /// Get remaining (unparsed) length
274  pub fn len(&self) -> usize {
275    self.graphemes.len() - self.cursor
276  }
277  
278  pub fn is_empty(&self) -> bool {
279    self.len() == 0
280  }
281
282  /// For debug purpose
283  fn output(&self) {
284              
285    println!("───────────────────{}", self.len());
286    for i in self.cursor..self.graphemes.len() {
287      print!("{}", self.graphemes[i]);
288    }
289    println!();
290    println!("───────────────────");
291  }
292}
293
294/// Required by nom
295impl<'a> nom::InputLength for ParseString<'a> {
296  fn input_len(&self) -> usize {
297    self.len()
298  }
299}
300
301/// The part of error context that's independent to its cause location.
302#[derive(Clone, Debug)]
303pub struct ParseErrorDetail {
304  pub message: &'static str,
305  pub annotation_rngs: Vec<SourceRange>,
306}
307
308/// The error type for the nom parser, which handles full error context
309/// (location + detail) and ownership of the input ParseString.
310///
311/// Eventually error context will be logged and ownership will be moved out.
312#[derive(Clone, Debug)]
313pub struct ParseError<'a> {
314  /// Cause range is defined as [start, end), where `start` points at the first
315  /// character that's catched by a label, and `end` points at the next 
316  /// character of the character that didn't match.
317  ///
318  /// Example:
319  ///   index:  1234567
320  ///   input:  abcdefg
321  ///   error:   ~~~^
322  ///   range:   |   |
323  ///           [2,  5)
324  ///
325  pub cause_range: SourceRange,
326  /// Hold ownership to the input ParseString
327  pub remaining_input: ParseString<'a>,
328  /// Detailed information about this error
329  pub error_detail: ParseErrorDetail,
330}
331
332impl<'a> ParseError<'a> {
333  /// Create a new error at current location of the input, with given message
334  /// and empty annotations.  Ownership of the input is also passed into this
335  /// error object.
336  pub fn new(input: ParseString<'a>, msg: &'static str) -> Self {
337    let start = input.loc();
338    let mut end = start;
339    end.col += 1;
340    ParseError {
341      cause_range: SourceRange { start, end },
342      remaining_input: input,
343      error_detail: ParseErrorDetail {
344        message: msg,
345        annotation_rngs: vec![],
346      }
347    }
348  }
349
350  /// Add self to the error log of input string.
351  fn log(&mut self) {
352    self.remaining_input.error_log.push((self.cause_range, self.error_detail.clone()));
353  }
354}
355
356/// Required by nom
357impl<'a> nom::error::ParseError<ParseString<'a>> for ParseError<'a> {
358  /// Not used, unless we have logical error
359  fn from_error_kind(input: ParseString<'a>,
360                      _kind: nom::error::ErrorKind) -> Self {
361    ParseError::new(input, "Unexpected error")
362  }
363
364  /// Probably not used
365  fn append(_input: ParseString<'a>,
366            _kind: nom::error::ErrorKind,
367            other: Self) -> Self {
368    other
369  }
370
371  /// Barely used, but we do want to keep the error with larger depth.
372  fn or(self, other: Self) -> Self {
373    let self_start = self.cause_range.start;
374    let other_start = other.cause_range.start;
375    if self_start > other_start {
376      self
377    } else {
378      other
379    }
380  }
381}
382
383/// This struct is responsible for analysing text, interpreting indices
384/// and ranges, and producing formatted messages.
385pub struct TextFormatter<'a> {
386  graphemes: Vec<&'a str>,
387  line_beginnings: Vec<usize>,
388  end_index: usize,
389}
390
391impl<'a> TextFormatter<'a> {
392  pub fn new(text: &'a str) -> Self {
393    let graphemes = graphemes::init_source(text);
394    let mut line_beginnings = vec![0];
395    for i in 0..graphemes.len() {
396      if graphemes::is_new_line(graphemes[i]) {
397        line_beginnings.push(i + 1);
398      }
399    }
400    line_beginnings.pop();
401    TextFormatter {
402      end_index: graphemes.len(),
403      graphemes,
404      line_beginnings,
405    }
406  }
407
408  // Index interpreter
409
410  fn get_line_range(&self, linenum: usize) -> Option<(usize, usize)> {
411    let line_index = linenum - 1;
412    if line_index >= self.line_beginnings.len() {
413      return None;
414    }
415    if linenum == self.line_beginnings.len() {  // asking for the last line
416      return Some((self.line_beginnings[line_index], self.end_index));
417    }
418    Some((self.line_beginnings[line_index], self.line_beginnings[linenum]))
419  }
420
421  fn get_text_by_linenum(&self, linenum: usize) -> String {
422    let (start, end) = match self.get_line_range(linenum) {
423      Some(v) => v,
424      None => return "\n".to_string(),
425    };
426    let mut s = self.graphemes[start..end].iter().map(|s| *s).collect::<String>();
427    if !s.ends_with("\n") {
428      s.push('\n');
429    }
430    s
431  }
432
433  fn get_textlen_by_linenum(&self, linenum: usize) -> usize {
434    let (start, end) = match self.get_line_range(linenum) {
435      Some(v) => v,
436      None => return 1,
437    };
438    let mut len = 0;
439    for i in start..end {
440      len += graphemes::width(self.graphemes[i]);
441    }
442    len + 1
443  }
444
445  // FormattedString printer
446
447  fn heading_color(s: &str) -> String {
448    s.truecolor(246, 192, 78).bold().to_string()
449  }
450
451  fn location_color(s: &str) -> String {
452    s.truecolor(0,187,204).bold().to_string()
453  }
454
455  fn linenum_color(s: &str) -> String {
456    s.truecolor(0,187,204).bold().to_string()
457  }
458
459  fn text_color(s: &str) -> String {
460    s.to_string()
461  }
462
463  fn annotation_color(s: &str) -> String {
464    s.truecolor(102,51,153).bold().to_string()
465  }
466
467  fn error_color(s: &str) -> String {
468    s.truecolor(170,51,85).bold().to_string()
469  }
470
471  fn ending_color(s: &str) -> String {
472    s.truecolor(246, 192, 78).bold().to_string()
473  }
474
475  fn err_heading(index: usize) -> String {
476    let n = index + 1;
477    let d = "────────────────────────";
478    let s = format!("{} syntax error #{} {}\n", d, n, d);
479    Self::heading_color(&s)
480  }
481
482  fn err_location(&self, ctx: &ParserErrorContext) -> String {
483    let err_end = ctx.cause_rng.end;
484    // error range will not ends at first column, so `minus 1` here is safe
485    let (row, col) = (err_end.row, err_end.col - 1);
486    let s = format!("@location:{}:{}\n", row, col);
487    Self::location_color(&s)
488  }
489
490  fn err_context(&self, ctx: &ParserErrorContext) -> String {
491    let mut result = String::new();
492
493    let mut annotation_rngs = ctx.annotation_rngs.clone();
494    annotation_rngs.push(ctx.cause_rng);
495
496    // the lines to print (1-indexed)
497    let mut lines_to_print: Vec<usize> = vec![];
498    for rng in &annotation_rngs {
499      let r1 = rng.start.row;
500      // if range ends at first column, it doesn't reach that row
501      let r2 = if rng.end.col == 1 {
502        usize::max(rng.start.row, rng.end.row - 1)
503      } else {
504        rng.end.row
505      };
506      for i in r1..=r2 {
507        lines_to_print.push(i);
508      }
509    }
510    lines_to_print.sort();
511    lines_to_print.dedup();
512
513    // the annotations on each line
514    // <linenum, Vec<(start_col, rng_len, is_major, is_cause)>>
515    let mut range_table: HashMap<usize, Vec<(usize, usize, bool, bool)>> = HashMap::new();
516    for linenum in &lines_to_print {
517      range_table.insert(*linenum, vec![]);
518    }
519    let n = annotation_rngs.len() - 1;  // if i == n, it's the last rng, i.e. the cause rng
520    for (i, rng) in annotation_rngs.iter().enumerate() {
521      // c2 might be 0
522      let (r1, c1) = (rng.start.row, rng.start.col);
523      let (r2, c2) = (rng.end.row, rng.end.col - 1);
524      if r1 == r2 {  // the entire range is on one line
525        if c2 >= c1 {  // and the range has non-zero length
526          range_table.get_mut(&r1).unwrap().push((c1, c2 - c1 + 1, true, i == n));
527        }
528      } else {  // the range spans over multiple lines
529        range_table.get_mut(&r1).unwrap().push((c1, usize::MAX, i != n, i == n));
530        for r in r1+1..r2 {
531          range_table.get_mut(&r).unwrap().push((1, usize::MAX, false, i == n));
532        }
533        if c2 != 0 {  // only add the last line if it hfnas non-zero length
534          range_table.get_mut(&r2).unwrap().push((1, c2, i == n, i == n));
535        }
536      }
537    }
538
539    // other data for printing
540    let dots = "...";
541    let indentation = " ";
542    let vert_split1 = " │";
543    let vert_split2 = "  ";
544    let arrow = "^";
545    let tilde = "~";
546    let lines_str: Vec<String> = lines_to_print.iter().map(|i| i.to_string()).collect();
547    let row_str_len = usize::max(lines_str.last().unwrap().len(), dots.len());
548
549    // print source code
550    for i in 0..lines_to_print.len() {
551      // [... | ]
552      if i != 0 && (lines_to_print[i] - lines_to_print[i-1] != 1) {
553        result.push_str(indentation);
554        for _ in 3..row_str_len { result.push(' '); }
555        result.push_str(&Self::linenum_color(dots));
556        result.push_str(&Self::linenum_color(vert_split1));
557        result.push('\n');
558      }
559
560      // [    | ]
561      result.push_str(indentation);
562      for _ in 0..row_str_len { result.push(' '); }
563      result.push_str(&Self::linenum_color(vert_split1));
564      result.push('\n');
565
566      // [row |  program text...]
567      let text = self.get_text_by_linenum(lines_to_print[i]);
568      result.push_str(indentation);
569      for _ in 0..row_str_len-lines_str[i].len() { result.push(' '); }
570      result.push_str(&Self::linenum_color(&lines_str[i]));
571      result.push_str(&Self::linenum_color(vert_split1));
572      result.push_str(&Self::text_color(&text));
573
574      // [    |    ^~~~]
575      result.push_str(indentation);
576      for _ in 0..row_str_len { result.push(' '); }
577      result.push_str(&Self::linenum_color(vert_split1));
578      let mut curr_col = 1;
579      let line_len = self.get_textlen_by_linenum(lines_to_print[i]);
580      let rngs = range_table.get(&lines_to_print[i]).unwrap();
581      for (start, len, major, cause) in rngs {
582        let max_len = usize::max(1, usize::min(*len, line_len - curr_col + 1));
583        for _ in curr_col..*start { result.push(' '); }
584        if *cause {
585          for _ in 0..max_len-1 {
586            result.push_str(&Self::error_color(tilde));
587          }
588          if *major {
589            result.push_str(&Self::error_color(arrow));
590          } else {
591            result.push_str(&Self::error_color(tilde));
592          }
593        } else {
594          if *major {
595            result.push_str(&Self::annotation_color(arrow));
596          } else {
597            result.push_str(&Self::annotation_color(tilde));
598          }
599          for _ in 0..max_len-1 {
600            result.push_str(&Self::annotation_color(tilde));
601          }
602        }
603        curr_col = start + max_len;
604      }
605      result.push('\n');
606    }
607
608    // print error message;
609    // error range never ends at first column, so it's safe to `minus 1` here
610    let cause_col = ctx.cause_rng.end.col - 1;
611    result.push_str(indentation);
612    for _ in 0..row_str_len { result.push(' '); }
613    result.push_str(vert_split2);
614    for _ in 0..cause_col-1 { result.push(' '); }
615    result.push_str(&Self::error_color(&ctx.err_message));
616    result.push('\n');
617
618    result
619  }
620
621  fn err_ending(d: usize) -> String {
622    let s = format!("... and {} other error{} not shown\n", d, if d == 1 {""} else {"s"});
623    Self::heading_color(&s)
624  }
625
626  /// Get formatted error message.
627  pub fn format_error(&self, errors: &ParserErrorReport) -> String {
628    let n = usize::min(errors.len(), 10);
629    let mut result = String::new();
630    result.push('\n');
631    for i in 0..n {
632      let ctx = &errors[i];
633      result.push_str(&Self::err_heading(i));
634      result.push_str(&self.err_location(ctx));
635      result.push_str(&self.err_context(ctx));
636      result.push_str("\n\n");
637    }
638    let d = errors.len() - n;
639    if d != 0 {
640      result.push_str(&Self::err_ending(d));
641    }
642    result
643  }
644}