1#![cfg_attr(feature = "no-std", no_std)]
4#![cfg_attr(feature = "no-std", alloc)]
5#![allow(dead_code)]
6#![allow(warnings)]
7
8extern crate mech_core;
9#[cfg(feature="no-std")] #[macro_use] extern crate alloc;
10#[cfg(not(feature = "no-std"))] extern crate core;
11extern crate nom;
12extern crate nom_unicode;
13extern crate tabled;
14
15use mech_core::*;
16use mech_core::nodes::*;
17use std::cell::RefCell;
18use std::rc::Rc;
19use num_traits::*;
20
21#[cfg(feature = "serde")] use serde::{Serialize, Deserialize};
22
23#[cfg(not(feature = "no-std"))] use core::fmt;
24#[cfg(feature = "no-std")] use alloc::fmt;
25#[cfg(feature = "no-std")] use alloc::string::String;
26#[cfg(feature = "no-std")] use alloc::vec::Vec;
27use nom::{
28 IResult,
29 branch::alt,
30 sequence::tuple,
31 combinator::{opt, eof},
32 multi::{many1, many_till, many0, separated_list1},
33 Err,
34};
35use nom::Parser;
36
37use std::collections::HashMap;
38use colored::*;
39
40pub mod mechdown;
42pub mod expressions;
43pub mod statements;
44pub mod structures;
45pub mod base;
46pub mod parser;
47#[cfg(feature = "formatter")]
48pub mod formatter;
49#[cfg(feature = "mika")]
50pub mod mika;
51pub mod grammar;
52pub mod literals;
53pub mod patterns;
54pub mod state_machines;
55pub mod functions;
56pub mod repl;
57
58pub use crate::parser::*;
59pub use crate::mechdown::*;
61pub use crate::expressions::*;
62pub use crate::statements::*;
63pub use crate::structures::*;
64pub use crate::base::*;
65#[cfg(feature = "formatter")]
66pub use crate::formatter::*;
67#[cfg(feature = "mika")]
68pub use crate::mika::*;
69pub use crate::grammar::*;
70pub use crate::literals::*;
71pub use crate::patterns::*;
72pub use crate::state_machines::*;
73pub use crate::functions::*;
74pub use crate::repl::*;
75
76
77pub mod graphemes {
81 use unicode_segmentation::UnicodeSegmentation;
82
83 pub fn init_source(text: &str) -> Vec<&str> {
87 let mut graphemes = UnicodeSegmentation::graphemes(text, true).collect::<Vec<&str>>();
88 graphemes.push("\n");
89 graphemes
90 }
91
92 pub fn init_tag(tag: &str) -> Vec<&str> {
93 UnicodeSegmentation::graphemes(tag, true).collect::<Vec<&str>>()
94 }
95
96 pub fn is_new_line(grapheme: &str) -> bool {
97 match grapheme {
98 "\r" | "\n" | "\r\n" => true,
99 _ => false,
100 }
101 }
102
103 pub fn is_numeric(grapheme: &str) -> bool {
104 grapheme.chars().next().unwrap().is_numeric()
105 }
106
107 pub fn is_alpha(grapheme: &str) -> bool {
108 grapheme.chars().next().unwrap().is_alphabetic()
109 }
110
111 pub fn is_emoji(grapheme: &str) -> bool {
112 let ch = grapheme.chars().next().unwrap();
113 !(ch.is_alphanumeric() || ch.is_ascii())
114 }
115
116 pub fn width(grapheme: &str) -> usize {
117 let ch = grapheme.chars().next().unwrap();
119 if ch == '\t' {
120 1
121 } else if ch.is_control() {
122 0
123 } else {
124 1
125 }
126 }
127}
128
129pub type ParseResult<'a, O> = IResult<ParseString<'a>, O, ParseError<'a>>;
131
132#[derive(Clone, Debug)]
136pub struct ParseString<'a> {
137 pub graphemes: &'a Vec<&'a str>,
139 pub error_log: Vec<(SourceRange, ParseErrorDetail)>,
141 pub cursor: usize,
143 pub location: SourceLocation,
145}
146
147impl<'a> ParseString<'a> {
148 pub fn new(graphemes: &'a Vec<&'a str>) -> Self {
150 ParseString {
151 graphemes,
152 error_log: vec![],
153 cursor: 0,
154 location: SourceLocation { row: 1, col: 1 },
155 }
156 }
157
158 pub fn rest(&self) -> String {
159 let mut s = String::new();
161 for i in self.cursor..self.graphemes.len() {
162 s.push_str(self.graphemes[i]);
163 }
164 s
165 }
166
167 pub fn peek(&self, n: usize) -> Option<&str> {
168 self.graphemes.get(self.cursor + n).copied()
169 }
170
171 pub fn current(&self) -> Option<&str> {
172 self.graphemes.get(self.cursor).copied()
173 }
174
175 pub fn next(&self) -> Option<&str> {
176 self.graphemes.get(self.cursor + 1).copied()
177 }
178
179 fn consume_tag(&mut self, tag: &str) -> Option<String> {
181 if self.is_empty() {
182 return None;
183 }
184 let current = self.graphemes[self.cursor];
185
186 let gs = graphemes::init_tag(tag);
187 let gs_len = gs.len();
188
189 if self.len() < gs_len {
191 return None;
192 }
193
194 let mut tmp_location = self.location;
196 for i in 0..gs_len {
197 let c = self.cursor + i;
198 let g = self.graphemes[c];
199 if g != gs[i] {
200 return None;
201 }
202 if graphemes::is_new_line(g) {
203 if !self.is_last_grapheme(c) {
204 tmp_location.row += 1;
205 tmp_location.col = 1;
206 }
207 } else {
208 tmp_location.col += graphemes::width(g);
209 }
210 }
211 self.cursor += gs_len;
213 self.location = tmp_location;
214 Some(tag.to_string())
215 }
216
217 pub fn slice(&self, start_cursor: usize, end_cursor: usize) -> String {
219 let start = start_cursor.min(self.graphemes.len());
220 let end = end_cursor.min(self.graphemes.len());
221 self.graphemes[start..end].join("")
222 }
223
224 fn consume_one(&mut self) -> Option<String> {
226 if self.is_empty() {
227 return None;
228 }
229 let g = self.graphemes[self.cursor];
230 if graphemes::is_new_line(g) {
231 if !self.is_last_grapheme(self.cursor) {
232 self.location.row += 1;
233 self.location.col = 1;
234 }
235 } else {
236 self.location.col += graphemes::width(g);
237 }
238 self.cursor += 1;
239 Some(g.to_string())
240 }
241
242
243 fn consume_emoji(&mut self) -> Option<String> {
245 if self.is_empty() {
246 return None;
247 }
248 let g = self.graphemes[self.cursor];
249
250 if graphemes::is_emoji(g) {
251 self.cursor += 1;
252 self.location.col += graphemes::width(g);
253 Some(g.to_string())
254 } else {
255 None
256 }
257 }
258
259 fn consume_alpha(&mut self) -> Option<String> {
261 if self.is_empty() {
262 return None;
263 }
264 let g = self.graphemes[self.cursor];
265 if graphemes::is_alpha(g) {
266 self.cursor += 1;
267 self.location.col += graphemes::width(g);
268 Some(g.to_string())
269 } else {
270 None
271 }
272 }
273
274 fn consume_digit(&mut self) -> Option<String> {
276 if self.is_empty() {
277 return None;
278 }
279 let g = self.graphemes[self.cursor];
280 if graphemes::is_numeric(g) {
281 self.cursor += 1;
282 self.location.col += graphemes::width(g);
283 Some(g.to_string())
284 } else {
285 None
286 }
287 }
288
289 fn loc(&self) -> SourceLocation {
291 self.location
292 }
293
294 fn is_last_grapheme(&self, c: usize) -> bool {
296 (self.graphemes.len() - 1 - c) == 0
297 }
298
299 pub fn len(&self) -> usize {
301 self.graphemes.len() - self.cursor
302 }
303
304 pub fn is_empty(&self) -> bool {
305 self.len() == 0
306 }
307
308 fn output(&self) {
310
311 println!("───────────────────{}", self.len());
312 for i in self.cursor..self.graphemes.len() {
313 print!("{}", self.graphemes[i]);
314 }
315 println!();
316 println!("───────────────────");
317 }
318}
319
320impl<'a> nom::InputLength for ParseString<'a> {
322 fn input_len(&self) -> usize {
323 self.len()
324 }
325}
326
327#[derive(Clone, Debug)]
329pub struct ParseErrorDetail {
330 pub message: &'static str,
331 pub annotation_rngs: Vec<SourceRange>,
332}
333
334#[derive(Clone, Debug)]
339pub struct ParseError<'a> {
340 pub cause_range: SourceRange,
352 pub remaining_input: ParseString<'a>,
354 pub error_detail: ParseErrorDetail,
356}
357
358impl<'a> ParseError<'a> {
359 pub fn new(input: ParseString<'a>, msg: &'static str) -> Self {
363 let start = input.loc();
364 let mut end = start;
365 end.col += 1;
366 ParseError {
367 cause_range: SourceRange { start, end },
368 remaining_input: input,
369 error_detail: ParseErrorDetail {
370 message: msg,
371 annotation_rngs: vec![],
372 }
373 }
374 }
375
376 fn log(&mut self) {
378 self.remaining_input.error_log.push((self.cause_range.clone(), self.error_detail.clone()));
379 }
380}
381
382impl<'a> nom::error::ParseError<ParseString<'a>> for ParseError<'a> {
384 fn from_error_kind(input: ParseString<'a>,
386 _kind: nom::error::ErrorKind) -> Self {
387 ParseError::new(input, format!("NomErrorKind: {:?}", _kind).leak())
388 }
389
390 fn append(_input: ParseString<'a>,
392 _kind: nom::error::ErrorKind,
393 other: Self) -> Self {
394 other
395 }
396
397 fn or(self, other: Self) -> Self {
399 let self_start = self.cause_range.start;
400 let other_start = other.cause_range.start;
401 if self_start > other_start {
402 self
403 } else {
404 other
405 }
406 }
407}
408
409pub struct TextFormatter<'a> {
412 graphemes: Vec<&'a str>,
413 line_beginnings: Vec<usize>,
414 end_index: usize,
415}
416
417impl<'a> TextFormatter<'a> {
418 pub fn new(text: &'a str) -> Self {
419 let graphemes = graphemes::init_source(text);
420 let mut line_beginnings = vec![0];
421 for i in 0..graphemes.len() {
422 if graphemes::is_new_line(graphemes[i]) {
423 line_beginnings.push(i + 1);
424 }
425 }
426 line_beginnings.pop();
427 TextFormatter {
428 end_index: graphemes.len(),
429 graphemes,
430 line_beginnings,
431 }
432 }
433
434 fn get_line_range(&self, linenum: usize) -> Option<(usize, usize)> {
437 let line_index = linenum - 1;
438 if line_index >= self.line_beginnings.len() {
439 return None;
440 }
441 if linenum == self.line_beginnings.len() { return Some((self.line_beginnings[line_index], self.end_index));
443 }
444 Some((self.line_beginnings[line_index], self.line_beginnings[linenum]))
445 }
446
447 fn get_text_by_linenum(&self, linenum: usize) -> String {
448 let (start, end) = match self.get_line_range(linenum) {
449 Some(v) => v,
450 None => return "\n".to_string(),
451 };
452 let mut s = self.graphemes[start..end].iter().map(|s| *s).collect::<String>();
453 if !s.ends_with("\n") {
454 s.push('\n');
455 }
456 s
457 }
458
459 fn get_textlen_by_linenum(&self, linenum: usize) -> usize {
460 let (start, end) = match self.get_line_range(linenum) {
461 Some(v) => v,
462 None => return 1,
463 };
464 let mut len = 0;
465 for i in start..end {
466 len += graphemes::width(self.graphemes[i]);
467 }
468 len + 1
469 }
470
471 fn heading_color(s: &str) -> String {
474 s.truecolor(246, 192, 78).bold().to_string()
475 }
476
477 fn location_color(s: &str) -> String {
478 s.truecolor(0,187,204).bold().to_string()
479 }
480
481 fn linenum_color(s: &str) -> String {
482 s.truecolor(0,187,204).bold().to_string()
483 }
484
485 fn text_color(s: &str) -> String {
486 s.to_string()
487 }
488
489 fn annotation_color(s: &str) -> String {
490 s.truecolor(102,51,153).bold().to_string()
491 }
492
493 fn error_color(s: &str) -> String {
494 s.truecolor(170,51,85).bold().to_string()
495 }
496
497 fn ending_color(s: &str) -> String {
498 s.truecolor(246, 192, 78).bold().to_string()
499 }
500
501 fn err_heading(index: usize) -> String {
502 let n = index + 1;
503 let d = "────────────────────────";
504 let s = format!("{} syntax error #{} {}\n", d, n, d);
505 Self::heading_color(&s)
506 }
507
508 fn err_location(&self, ctx: &ParserErrorContext) -> String {
509 let err_end = ctx.cause_rng.end;
510 let (row, col) = (err_end.row, err_end.col - 1);
512 let s = format!("@location:{}:{}\n", row, col);
513 Self::location_color(&s)
514 }
515
516 fn err_context(&self, ctx: &ParserErrorContext) -> String {
517 let mut result = String::new();
518
519 let mut annotation_rngs = ctx.annotation_rngs.clone();
520 annotation_rngs.push(ctx.cause_rng.clone());
521
522 let mut lines_to_print: Vec<usize> = vec![];
524 for rng in &annotation_rngs {
525 let r1 = rng.start.row;
526 let r2 = if rng.end.col == 1 {
528 usize::max(rng.start.row, rng.end.row - 1)
529 } else {
530 rng.end.row
531 };
532 for i in r1..=r2 {
533 lines_to_print.push(i);
534 }
535 }
536 lines_to_print.sort();
537 lines_to_print.dedup();
538
539 let mut range_table: HashMap<usize, Vec<(usize, usize, bool, bool)>> = HashMap::new();
542 for linenum in &lines_to_print {
543 range_table.insert(*linenum, vec![]);
544 }
545 let n = annotation_rngs.len() - 1; for (i, rng) in annotation_rngs.iter().enumerate() {
547 let (r1, c1) = (rng.start.row, rng.start.col);
549 let (r2, c2) = (rng.end.row, rng.end.col - 1);
550 if r1 == r2 { if c2 >= c1 { range_table.get_mut(&r1).unwrap().push((c1, c2 - c1 + 1, true, i == n));
553 }
554 } else { range_table.get_mut(&r1).unwrap().push((c1, usize::MAX, i != n, i == n));
556 for r in r1+1..r2 {
557 range_table.get_mut(&r).unwrap().push((1, usize::MAX, false, i == n));
558 }
559 if c2 != 0 { range_table.get_mut(&r2).unwrap().push((1, c2, i == n, i == n));
561 }
562 }
563 }
564
565 let dots = "...";
567 let indentation = " ";
568 let vert_split1 = " │";
569 let vert_split2 = " ";
570 let arrow = "^";
571 let tilde = "~";
572 let lines_str: Vec<String> = lines_to_print.iter().map(|i| i.to_string()).collect();
573 let row_str_len = usize::max(lines_str.last().unwrap().len(), dots.len());
574
575 for i in 0..lines_to_print.len() {
577 if i != 0 && (lines_to_print[i] - lines_to_print[i-1] != 1) {
579 result.push_str(indentation);
580 for _ in 3..row_str_len { result.push(' '); }
581 result.push_str(&Self::linenum_color(dots));
582 result.push_str(&Self::linenum_color(vert_split1));
583 result.push('\n');
584 }
585
586 result.push_str(indentation);
588 for _ in 0..row_str_len { result.push(' '); }
589 result.push_str(&Self::linenum_color(vert_split1));
590 result.push('\n');
591
592 let text = self.get_text_by_linenum(lines_to_print[i]);
594 result.push_str(indentation);
595 for _ in 0..row_str_len-lines_str[i].len() { result.push(' '); }
596 result.push_str(&Self::linenum_color(&lines_str[i]));
597 result.push_str(&Self::linenum_color(vert_split1));
598 result.push_str(&Self::text_color(&text));
599
600 result.push_str(indentation);
602 for _ in 0..row_str_len { result.push(' '); }
603 result.push_str(&Self::linenum_color(vert_split1));
604 let mut curr_col = 1;
605 let line_len = self.get_textlen_by_linenum(lines_to_print[i]);
606 let rngs = range_table.get(&lines_to_print[i]).unwrap();
607 for (start, len, major, cause) in rngs {
608 let max_len = usize::max(1, usize::min(*len, line_len - curr_col + 1));
609 for _ in curr_col..*start { result.push(' '); }
610 if *cause {
611 for _ in 0..max_len-1 {
612 result.push_str(&Self::error_color(tilde));
613 }
614 if *major {
615 result.push_str(&Self::error_color(arrow));
616 } else {
617 result.push_str(&Self::error_color(tilde));
618 }
619 } else {
620 if *major {
621 result.push_str(&Self::annotation_color(arrow));
622 } else {
623 result.push_str(&Self::annotation_color(tilde));
624 }
625 for _ in 0..max_len-1 {
626 result.push_str(&Self::annotation_color(tilde));
627 }
628 }
629 curr_col = start + max_len;
630 }
631 result.push('\n');
632 }
633
634 let cause_col = ctx.cause_rng.end.col - 1;
637 result.push_str(indentation);
638 for _ in 0..row_str_len { result.push(' '); }
639 result.push_str(vert_split2);
640 for _ in 0..cause_col-1 { result.push(' '); }
641 result.push_str(&Self::error_color(&ctx.err_message));
642 result.push('\n');
643
644 result
645 }
646
647 fn err_ending(d: usize) -> String {
648 let s = format!("... and {} other error{} not shown\n", d, if d == 1 {""} else {"s"});
649 Self::heading_color(&s)
650 }
651
652 pub fn format_error(&self, errors: &ParserErrorReport) -> String {
654 let n = usize::min(errors.1.len(), 10);
655 let mut result = String::new();
656 result.push('\n');
657 for i in 0..n {
658 let ctx = &errors.1[i];
659 result.push_str(&Self::err_heading(i));
660 result.push_str(&self.err_location(ctx));
661 result.push_str(&self.err_context(ctx));
662 result.push_str("\n\n");
663 }
664 let d = errors.0.len() - n;
665 if d != 0 {
666 result.push_str(&Self::err_ending(d));
667 }
668 result
669 }
670}
671
672#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
673#[derive(Clone, Debug, Eq, PartialEq, Hash)]
674pub struct ParserErrorContext {
675 pub cause_rng: SourceRange,
676 pub err_message: String,
677 pub annotation_rngs: Vec<SourceRange>,
678}
679
680#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
681#[derive(Clone, Debug, PartialEq, Eq, Hash)]
682pub struct ParserErrorReport(pub String, pub Vec<ParserErrorContext>);
683
684impl MechErrorKind for ParserErrorReport {
685 fn name(&self) -> &str {
686 "ParserErrorContext"
687 }
688 fn message(&self) -> String {
689 let source = &self.0;
690 let lines: Vec<&str> = source.lines().collect();
691
692 self.1
693 .iter()
694 .map(|e| {
695 let cause_snippet = extract_snippet(&lines, &e.cause_rng);
696
697 let annotation_snippets = e.annotation_rngs
698 .iter()
699 .map(|rng| extract_snippet(&lines, rng))
700 .collect::<Vec<_>>()
701 .join("\n");
702
703 format!(
704 "{}: {} (Annotations: [{}])\n\nSource:\n{}\n\nAnnotations:\n{}",
705 format!(
706 "[{}:{}-{}:{}]",
707 e.cause_rng.start.row,
708 e.cause_rng.start.col,
709 e.cause_rng.end.row,
710 e.cause_rng.end.col
711 ),
712 e.err_message,
713 e.annotation_rngs.iter()
714 .map(|rng| format!(
715 "[{}:{}-{}:{}]",
716 rng.start.row, rng.start.col, rng.end.row, rng.end.col
717 ))
718 .collect::<Vec<_>>()
719 .join(", "),
720 indent(&cause_snippet),
721 indent(&annotation_snippets)
722 )
723 })
724 .collect::<Vec<_>>()
725 .join("\n\n---\n\n")
726 }
727}
728
729fn extract_snippet(lines: &[&str], range: &SourceRange) -> String {
730 let mut out = String::new();
731
732 for row in range.start.row..=range.end.row {
733 if let Some(row_index) = row.checked_sub(1) {
734 if let Some(line) = lines.get(row_index) {
735 let start_col = if row == range.start.row { range.start.col } else { 1 };
736 let end_col = if row == range.end.row {
737 range.end.col
738 } else {
739 line.chars().count() + 1
740 };
741
742 out.push_str(&slice_by_char_cols(line, start_col, end_col));
743 out.push('\n');
744 }
745 }
746 }
747
748 out
749}
750
751fn slice_by_char_cols(line: &str, start_col: usize, end_col: usize) -> String {
752 let start_idx = start_col.saturating_sub(1);
753 let end_idx = end_col.saturating_sub(1);
754
755 if start_idx >= end_idx {
756 return String::new();
757 }
758
759 line.chars()
760 .skip(start_idx)
761 .take(end_idx - start_idx)
762 .collect()
763}
764
765fn indent(s: &str) -> String {
766 s.lines()
767 .map(|line| format!(" {}", line))
768 .collect::<Vec<_>>()
769 .join("\n")
770}
771
772#[cfg(test)]
773mod tests {
774 use super::*;
775
776 #[test]
777 fn extract_snippet_handles_unicode_boundaries() {
778 let line = " :Reverse([], acc, swaps) → :Pass(acc, [], 0)";
779 let lines = vec![line];
780 let range = SourceRange {
781 start: SourceLocation { row: 1, col: 1 },
782 end: SourceLocation {
783 row: 1,
784 col: line.chars().count() + 1,
785 },
786 };
787
788 let snippet = extract_snippet(&lines, &range);
789 assert_eq!(snippet, format!("{line}\n"));
790 }
791}
792
793pub fn alt_best<'a, O>(
796 input: ParseString<'a>,
797 parsers: &[(&'static str, Box<dyn Fn(ParseString) -> ParseResult<O>>)],
798) -> ParseResult<'a, O> {
799 let start_cursor = input.cursor;
800
801 let mut best_success: Option<(ParseString, O, usize, &'static str)> = None;
802 let mut best_failure: Option<(nom::Err<ParseError>, usize, &'static str)> = None;
803 let mut best_error: Option<(nom::Err<ParseError>, usize, &'static str)> = None;
804
805 for (name, parser) in parsers {
806 match parser(input.clone()) {
807 Ok((next_input, val)) => {
808 if *name == "mech_code" {
809 return Ok((next_input, val));
810 }
811 let consumed = next_input.cursor;
812 if best_success.is_none() || consumed > best_success.as_ref().unwrap().2 {
813 best_success = Some((next_input, val, consumed, name));
814 }
815 }
816
817 Err(nom::Err::Failure(e)) => {
818 let reached = e.remaining_input.cursor;
819 if best_failure.is_none() || reached > best_failure.as_ref().unwrap().1 {
820 best_failure = Some((nom::Err::Failure(e), reached, name));
821 }
822 }
823
824 Err(nom::Err::Error(e)) => {
825 let reached = e.remaining_input.cursor;
826 if best_error.is_none() || reached > best_error.as_ref().unwrap().1 {
827 best_error = Some((nom::Err::Error(e), reached, name));
828 }
829 }
830
831 Err(e @ nom::Err::Incomplete(_)) => {
832 return Err(e);
833 }
834 }
835 }
836
837 if let Some((next_input, val, success_cursor, _)) = best_success {
839 if let Some((nom::Err::Failure(failure), failure_cursor, _)) = best_failure {
840 if success_cursor > failure_cursor {
841 Ok((next_input, val))
842 } else {
843 Err(nom::Err::Failure(failure))
844 }
845 } else {
846 Ok((next_input, val))
847 }
848 } else if let Some((nom::Err::Failure(failure), _, _)) = best_failure {
849 Err(nom::Err::Failure(failure))
850 } else if let Some((err, _, _)) = best_error {
851 Err(err)
852 } else {
853 Err(nom::Err::Error(ParseError::new(
854 input,
855 "No parser matched in alt_best",
856 )))
857 }
858}