mech_syntax/
lib.rs

1// # Syntax
2
3#![cfg_attr(feature = "no-std", no_std)]
4#![cfg_attr(feature = "no-std", alloc)]
5#![feature(extract_if)]
6#![feature(get_mut_unchecked)]
7#![allow(dead_code)]
8#![allow(warnings)]
9#![feature(step_trait)]
10
11extern crate mech_core;
12#[cfg(feature="no-std")] #[macro_use] extern crate alloc;
13#[cfg(not(feature = "no-std"))] extern crate core;
14extern crate hashbrown;
15extern crate nom;
16extern crate nom_unicode;
17extern crate tabled;
18
19use mech_core::*;
20use mech_core::nodes::*;
21use std::cell::RefCell;
22use std::rc::Rc;
23use num_traits::*;
24
25#[cfg(not(feature = "no-std"))] use core::fmt;
26#[cfg(feature = "no-std")] use alloc::fmt;
27#[cfg(feature = "no-std")] use alloc::string::String;
28#[cfg(feature = "no-std")] use alloc::vec::Vec;
29use nom::{
30  IResult,
31  branch::alt,
32  sequence::tuple,
33  combinator::{opt, eof},
34  multi::{many1, many_till, many0, separated_list1},
35  Err,
36};
37
38use std::collections::HashMap;
39use colored::*;
40
41pub mod mechdown;
42pub mod expressions;
43pub mod statements;
44pub mod structures;
45pub mod base;
46pub mod parser;
47#[cfg(feature = "formatter")]
48pub mod formatter;
49pub mod grammar;
50pub mod literals;
51pub mod state_machines;
52pub mod functions;
53pub mod repl;
54
55pub use crate::parser::*;
56pub use crate::mechdown::*;
57pub use crate::expressions::*;
58pub use crate::statements::*;
59pub use crate::structures::*;
60pub use crate::base::*;
61#[cfg(feature = "formatter")]
62pub use crate::formatter::*;
63pub use crate::grammar::*;
64pub use crate::literals::*;
65pub use crate::state_machines::*;
66pub use crate::functions::*;
67pub use crate::repl::*;
68
69
70/// Unicode grapheme group utilities.
71/// Current implementation does not guarantee correct behavior for
72/// all possible unicode characters.
73pub mod graphemes {
74  use unicode_segmentation::UnicodeSegmentation;
75
76  /// Obtain unicode grapheme groups from input source, then make sure
77  /// it ends with new_line.  Many functions in the parser assume input
78  /// ends with new_line.
79  pub fn init_source(text: &str) -> Vec<&str> {
80    let mut graphemes = UnicodeSegmentation::graphemes(text, true).collect::<Vec<&str>>();
81    graphemes.push("\n");
82    graphemes
83  }
84
85  pub fn init_tag(tag: &str) -> Vec<&str> {
86    UnicodeSegmentation::graphemes(tag, true).collect::<Vec<&str>>()
87  }
88
89  pub fn is_new_line(grapheme: &str) -> bool {
90    match grapheme {
91      "\r" | "\n" | "\r\n" => true,
92      _ => false,
93    }
94  }
95
96  pub fn is_numeric(grapheme: &str) -> bool {
97    grapheme.chars().next().unwrap().is_numeric()
98  }
99
100  pub fn is_alpha(grapheme: &str) -> bool {
101    grapheme.chars().next().unwrap().is_alphabetic()
102  }
103
104  pub fn is_emoji(grapheme: &str) -> bool {
105    let ch = grapheme.chars().next().unwrap();
106    !(ch.is_alphanumeric() || ch.is_ascii())
107  }
108
109  pub fn width(grapheme: &str) -> usize {
110    // TODO: uniode width?
111    let ch = grapheme.chars().next().unwrap();
112    if ch == '\t' {
113      1
114    } else if ch.is_control() {
115      0
116    } else {
117      1
118    }
119  }
120}
121
122/// Just alias
123pub type ParseResult<'a, O> = IResult<ParseString<'a>, O, ParseError<'a>>;
124
125/// The input type for nom parsers. Instead of holding the actual input
126/// string, this struct only holds a reference to that string so that it
127/// can be cloned at much lower cost.
128#[derive(Clone, Debug)]
129pub struct ParseString<'a> {
130  /// Source code
131  pub graphemes: &'a Vec<&'a str>,
132  /// Error report, a list of (error_location, error_context)
133  pub error_log: Vec<(SourceRange, ParseErrorDetail)>,
134  /// Point at the next grapheme to consume
135  pub cursor: usize,
136  /// Location of the grapheme pointed by cursor
137  pub location: SourceLocation,
138}
139
140impl<'a> ParseString<'a> {
141  /// Must always point a an actual string
142  pub fn new(graphemes: &'a Vec<&'a str>) -> Self {
143    ParseString {
144      graphemes,
145      error_log: vec![],
146      cursor: 0,
147      location: SourceLocation { row: 1, col: 1 },
148    }
149  }
150
151  pub fn rest(&self) -> String {
152    // Return the rest of the string from current cursor
153    let mut s = String::new();
154    for i in self.cursor..self.graphemes.len() {
155      s.push_str(self.graphemes[i]);
156    }
157    s
158  }
159
160  pub fn peek(&self, n: usize) -> Option<&str> {
161    self.graphemes.get(self.cursor + n).copied()
162  }
163
164  pub fn current(&self) -> Option<&str> {
165    self.graphemes.get(self.cursor).copied()
166  }
167
168  pub fn next(&self) -> Option<&str> {
169    self.graphemes.get(self.cursor + 1).copied()
170  }
171
172  /// If current location matches the tag, consume the matched string.
173  fn consume_tag(&mut self, tag: &str) -> Option<String> {
174    if self.is_empty() {
175      return None;
176    }
177    let current = self.graphemes[self.cursor];
178
179    let gs = graphemes::init_tag(tag); 
180    let gs_len = gs.len();
181
182    // Must have enough remaining characters
183    if self.len() < gs_len {
184      return None;
185    }
186
187    // Try to match the tag
188    let mut tmp_location = self.location;
189    for i in 0..gs_len {
190      let c = self.cursor + i;
191      let g = self.graphemes[c];
192      if g != gs[i] {
193        return None;
194      }
195      if graphemes::is_new_line(g) {
196        if !self.is_last_grapheme(c) {
197          tmp_location.row += 1;
198          tmp_location.col = 1;
199        }
200      } else {
201        tmp_location.col += graphemes::width(g);
202      }
203    }
204    // Tag matched, commit change
205    self.cursor += gs_len;
206    self.location = tmp_location;
207    Some(tag.to_string())
208  }
209
210  /// Mutate self by consuming one grapheme
211  fn consume_one(&mut self) -> Option<String> {
212    if self.is_empty() {
213      return None;
214    }
215    let g = self.graphemes[self.cursor];
216    if graphemes::is_new_line(g) {
217      if !self.is_last_grapheme(self.cursor) {
218        self.location.row += 1;
219        self.location.col = 1;
220      }
221    } else {
222      self.location.col += graphemes::width(g);
223    }
224    self.cursor += 1;
225    Some(g.to_string())
226  }
227
228
229  /// If current location matches any emoji, consume the matched string.
230  fn consume_emoji(&mut self) -> Option<String> {
231    if self.is_empty() {
232      return None;
233    }
234    let g = self.graphemes[self.cursor];
235    
236    if graphemes::is_emoji(g) {
237      self.cursor += 1;
238      self.location.col += graphemes::width(g);
239      Some(g.to_string())
240    } else {
241      None
242    }
243  }
244
245  /// If current location matches any alpha char, consume the matched string.
246  fn consume_alpha(&mut self) -> Option<String> {
247    if self.is_empty() {
248      return None;
249    }
250    let g = self.graphemes[self.cursor];
251    if graphemes::is_alpha(g) {
252      self.cursor += 1;
253      self.location.col += graphemes::width(g);
254      Some(g.to_string())
255    } else {
256      None
257    }
258  }
259
260  /// If current location matches any digit, consume the matched string.
261  fn consume_digit(&mut self) -> Option<String> {
262    if self.is_empty() {
263      return None;
264    }
265    let g = self.graphemes[self.cursor];
266    if graphemes::is_numeric(g) {
267      self.cursor += 1;
268      self.location.col += graphemes::width(g);
269      Some(g.to_string())
270    } else {
271      None
272    }
273  }
274
275  /// Get cursor's location in source code
276  fn loc(&self) -> SourceLocation {
277    self.location
278  }
279
280  /// Test whether the grapheme pointed by cursor is the last grapheme
281  fn is_last_grapheme(&self, c: usize) -> bool {
282    (self.graphemes.len() - 1 - c) == 0
283  }
284
285  /// Get remaining (unparsed) length
286  pub fn len(&self) -> usize {
287    self.graphemes.len() - self.cursor
288  }
289  
290  pub fn is_empty(&self) -> bool {
291    self.len() == 0
292  }
293
294  /// For debug purpose
295  fn output(&self) {
296              
297    println!("───────────────────{}", self.len());
298    for i in self.cursor..self.graphemes.len() {
299      print!("{}", self.graphemes[i]);
300    }
301    println!();
302    println!("───────────────────");
303  }
304}
305
306/// Required by nom
307impl<'a> nom::InputLength for ParseString<'a> {
308  fn input_len(&self) -> usize {
309    self.len()
310  }
311}
312
313/// The part of error context that's independent to its cause location.
314#[derive(Clone, Debug)]
315pub struct ParseErrorDetail {
316  pub message: &'static str,
317  pub annotation_rngs: Vec<SourceRange>,
318}
319
320/// The error type for the nom parser, which handles full error context
321/// (location + detail) and ownership of the input ParseString.
322///
323/// Eventually error context will be logged and ownership will be moved out.
324#[derive(Clone, Debug)]
325pub struct ParseError<'a> {
326  /// Cause range is defined as [start, end), where `start` points at the first
327  /// character that's catched by a label, and `end` points at the next 
328  /// character of the character that didn't match.
329  ///
330  /// Example:
331  ///   index:  1234567
332  ///   input:  abcdefg
333  ///   error:   ~~~^
334  ///   range:   |   |
335  ///           [2,  5)
336  ///
337  pub cause_range: SourceRange,
338  /// Hold ownership to the input ParseString
339  pub remaining_input: ParseString<'a>,
340  /// Detailed information about this error
341  pub error_detail: ParseErrorDetail,
342}
343
344impl<'a> ParseError<'a> {
345  /// Create a new error at current location of the input, with given message
346  /// and empty annotations.  Ownership of the input is also passed into this
347  /// error object.
348  pub fn new(input: ParseString<'a>, msg: &'static str) -> Self {
349    let start = input.loc();
350    let mut end = start;
351    end.col += 1;
352    ParseError {
353      cause_range: SourceRange { start, end },
354      remaining_input: input,
355      error_detail: ParseErrorDetail {
356        message: msg,
357        annotation_rngs: vec![],
358      }
359    }
360  }
361
362  /// Add self to the error log of input string.
363  fn log(&mut self) {
364    self.remaining_input.error_log.push((self.cause_range.clone(), self.error_detail.clone()));
365  }
366}
367
368/// Required by nom
369impl<'a> nom::error::ParseError<ParseString<'a>> for ParseError<'a> {
370  /// Not used, unless we have logical error
371  fn from_error_kind(input: ParseString<'a>,
372                      _kind: nom::error::ErrorKind) -> Self {
373    ParseError::new(input, "Unexpected error")
374  }
375
376  /// Probably not used
377  fn append(_input: ParseString<'a>,
378            _kind: nom::error::ErrorKind,
379            other: Self) -> Self {
380    other
381  }
382
383  /// Barely used, but we do want to keep the error with larger depth.
384  fn or(self, other: Self) -> Self {
385    let self_start = self.cause_range.start;
386    let other_start = other.cause_range.start;
387    if self_start > other_start {
388      self
389    } else {
390      other
391    }
392  }
393}
394
395/// This struct is responsible for analysing text, interpreting indices
396/// and ranges, and producing formatted messages.
397pub struct TextFormatter<'a> {
398  graphemes: Vec<&'a str>,
399  line_beginnings: Vec<usize>,
400  end_index: usize,
401}
402
403impl<'a> TextFormatter<'a> {
404  pub fn new(text: &'a str) -> Self {
405    let graphemes = graphemes::init_source(text);
406    let mut line_beginnings = vec![0];
407    for i in 0..graphemes.len() {
408      if graphemes::is_new_line(graphemes[i]) {
409        line_beginnings.push(i + 1);
410      }
411    }
412    line_beginnings.pop();
413    TextFormatter {
414      end_index: graphemes.len(),
415      graphemes,
416      line_beginnings,
417    }
418  }
419
420  // Index interpreter
421
422  fn get_line_range(&self, linenum: usize) -> Option<(usize, usize)> {
423    let line_index = linenum - 1;
424    if line_index >= self.line_beginnings.len() {
425      return None;
426    }
427    if linenum == self.line_beginnings.len() {  // asking for the last line
428      return Some((self.line_beginnings[line_index], self.end_index));
429    }
430    Some((self.line_beginnings[line_index], self.line_beginnings[linenum]))
431  }
432
433  fn get_text_by_linenum(&self, linenum: usize) -> String {
434    let (start, end) = match self.get_line_range(linenum) {
435      Some(v) => v,
436      None => return "\n".to_string(),
437    };
438    let mut s = self.graphemes[start..end].iter().map(|s| *s).collect::<String>();
439    if !s.ends_with("\n") {
440      s.push('\n');
441    }
442    s
443  }
444
445  fn get_textlen_by_linenum(&self, linenum: usize) -> usize {
446    let (start, end) = match self.get_line_range(linenum) {
447      Some(v) => v,
448      None => return 1,
449    };
450    let mut len = 0;
451    for i in start..end {
452      len += graphemes::width(self.graphemes[i]);
453    }
454    len + 1
455  }
456
457  // FormattedString printer
458
459  fn heading_color(s: &str) -> String {
460    s.truecolor(246, 192, 78).bold().to_string()
461  }
462
463  fn location_color(s: &str) -> String {
464    s.truecolor(0,187,204).bold().to_string()
465  }
466
467  fn linenum_color(s: &str) -> String {
468    s.truecolor(0,187,204).bold().to_string()
469  }
470
471  fn text_color(s: &str) -> String {
472    s.to_string()
473  }
474
475  fn annotation_color(s: &str) -> String {
476    s.truecolor(102,51,153).bold().to_string()
477  }
478
479  fn error_color(s: &str) -> String {
480    s.truecolor(170,51,85).bold().to_string()
481  }
482
483  fn ending_color(s: &str) -> String {
484    s.truecolor(246, 192, 78).bold().to_string()
485  }
486
487  fn err_heading(index: usize) -> String {
488    let n = index + 1;
489    let d = "────────────────────────";
490    let s = format!("{} syntax error #{} {}\n", d, n, d);
491    Self::heading_color(&s)
492  }
493
494  fn err_location(&self, ctx: &ParserErrorContext) -> String {
495    let err_end = ctx.cause_rng.end;
496    // error range will not ends at first column, so `minus 1` here is safe
497    let (row, col) = (err_end.row, err_end.col - 1);
498    let s = format!("@location:{}:{}\n", row, col);
499    Self::location_color(&s)
500  }
501
502  fn err_context(&self, ctx: &ParserErrorContext) -> String {
503    let mut result = String::new();
504
505    let mut annotation_rngs = ctx.annotation_rngs.clone();
506    annotation_rngs.push(ctx.cause_rng.clone());
507
508    // the lines to print (1-indexed)
509    let mut lines_to_print: Vec<usize> = vec![];
510    for rng in &annotation_rngs {
511      let r1 = rng.start.row;
512      // if range ends at first column, it doesn't reach that row
513      let r2 = if rng.end.col == 1 {
514        usize::max(rng.start.row, rng.end.row - 1)
515      } else {
516        rng.end.row
517      };
518      for i in r1..=r2 {
519        lines_to_print.push(i);
520      }
521    }
522    lines_to_print.sort();
523    lines_to_print.dedup();
524
525    // the annotations on each line
526    // <linenum, Vec<(start_col, rng_len, is_major, is_cause)>>
527    let mut range_table: HashMap<usize, Vec<(usize, usize, bool, bool)>> = HashMap::new();
528    for linenum in &lines_to_print {
529      range_table.insert(*linenum, vec![]);
530    }
531    let n = annotation_rngs.len() - 1;  // if i == n, it's the last rng, i.e. the cause rng
532    for (i, rng) in annotation_rngs.iter().enumerate() {
533      // c2 might be 0
534      let (r1, c1) = (rng.start.row, rng.start.col);
535      let (r2, c2) = (rng.end.row, rng.end.col - 1);
536      if r1 == r2 {  // the entire range is on one line
537        if c2 >= c1 {  // and the range has non-zero length
538          range_table.get_mut(&r1).unwrap().push((c1, c2 - c1 + 1, true, i == n));
539        }
540      } else {  // the range spans over multiple lines
541        range_table.get_mut(&r1).unwrap().push((c1, usize::MAX, i != n, i == n));
542        for r in r1+1..r2 {
543          range_table.get_mut(&r).unwrap().push((1, usize::MAX, false, i == n));
544        }
545        if c2 != 0 {  // only add the last line if it hfnas non-zero length
546          range_table.get_mut(&r2).unwrap().push((1, c2, i == n, i == n));
547        }
548      }
549    }
550
551    // other data for printing
552    let dots = "...";
553    let indentation = " ";
554    let vert_split1 = " │";
555    let vert_split2 = "  ";
556    let arrow = "^";
557    let tilde = "~";
558    let lines_str: Vec<String> = lines_to_print.iter().map(|i| i.to_string()).collect();
559    let row_str_len = usize::max(lines_str.last().unwrap().len(), dots.len());
560
561    // print source code
562    for i in 0..lines_to_print.len() {
563      // [... | ]
564      if i != 0 && (lines_to_print[i] - lines_to_print[i-1] != 1) {
565        result.push_str(indentation);
566        for _ in 3..row_str_len { result.push(' '); }
567        result.push_str(&Self::linenum_color(dots));
568        result.push_str(&Self::linenum_color(vert_split1));
569        result.push('\n');
570      }
571
572      // [    | ]
573      result.push_str(indentation);
574      for _ in 0..row_str_len { result.push(' '); }
575      result.push_str(&Self::linenum_color(vert_split1));
576      result.push('\n');
577
578      // [row |  program text...]
579      let text = self.get_text_by_linenum(lines_to_print[i]);
580      result.push_str(indentation);
581      for _ in 0..row_str_len-lines_str[i].len() { result.push(' '); }
582      result.push_str(&Self::linenum_color(&lines_str[i]));
583      result.push_str(&Self::linenum_color(vert_split1));
584      result.push_str(&Self::text_color(&text));
585
586      // [    |    ^~~~]
587      result.push_str(indentation);
588      for _ in 0..row_str_len { result.push(' '); }
589      result.push_str(&Self::linenum_color(vert_split1));
590      let mut curr_col = 1;
591      let line_len = self.get_textlen_by_linenum(lines_to_print[i]);
592      let rngs = range_table.get(&lines_to_print[i]).unwrap();
593      for (start, len, major, cause) in rngs {
594        let max_len = usize::max(1, usize::min(*len, line_len - curr_col + 1));
595        for _ in curr_col..*start { result.push(' '); }
596        if *cause {
597          for _ in 0..max_len-1 {
598            result.push_str(&Self::error_color(tilde));
599          }
600          if *major {
601            result.push_str(&Self::error_color(arrow));
602          } else {
603            result.push_str(&Self::error_color(tilde));
604          }
605        } else {
606          if *major {
607            result.push_str(&Self::annotation_color(arrow));
608          } else {
609            result.push_str(&Self::annotation_color(tilde));
610          }
611          for _ in 0..max_len-1 {
612            result.push_str(&Self::annotation_color(tilde));
613          }
614        }
615        curr_col = start + max_len;
616      }
617      result.push('\n');
618    }
619
620    // print error message;
621    // error range never ends at first column, so it's safe to `minus 1` here
622    let cause_col = ctx.cause_rng.end.col - 1;
623    result.push_str(indentation);
624    for _ in 0..row_str_len { result.push(' '); }
625    result.push_str(vert_split2);
626    for _ in 0..cause_col-1 { result.push(' '); }
627    result.push_str(&Self::error_color(&ctx.err_message));
628    result.push('\n');
629
630    result
631  }
632
633  fn err_ending(d: usize) -> String {
634    let s = format!("... and {} other error{} not shown\n", d, if d == 1 {""} else {"s"});
635    Self::heading_color(&s)
636  }
637
638  /// Get formatted error message.
639  pub fn format_error(&self, errors: &ParserErrorReport) -> String {
640    let n = usize::min(errors.len(), 10);
641    let mut result = String::new();
642    result.push('\n');
643    for i in 0..n {
644      let ctx = &errors[i];
645      result.push_str(&Self::err_heading(i));
646      result.push_str(&self.err_location(ctx));
647      result.push_str(&self.err_context(ctx));
648      result.push_str("\n\n");
649    }
650    let d = errors.len() - n;
651    if d != 0 {
652      result.push_str(&Self::err_ending(d));
653    }
654    result
655  }
656}