Skip to main content

mech_syntax/
base.rs

1#[macro_use]
2use crate::parser::*;
3use crate::*;
4use crate::label;
5use crate::labelr;
6use nom::{
7  multi::separated_list0,
8  sequence::tuple as nom_tuple,
9};
10use crate::nodes::Kind;
11
12// Lexical Elements
13// ----------------------------------------------------------------------------
14// Ref: #58393432045966419
15
16macro_rules! leaf {
17  ($name:ident, $byte:expr, $token:expr) => (
18    pub fn $name(input: ParseString) -> ParseResult<Token> {
19      if input.is_empty() {
20        return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
21      }
22      let start = input.loc();
23      let byte = input.graphemes[input.cursor];
24      let (input, _) = tag($byte)(input)?;
25      let end = input.loc();
26      let src_range = SourceRange { start, end };
27      Ok((input, Token{kind: $token, chars: $byte.chars().collect::<Vec<char>>(), src_range}))
28    }
29  )
30}
31
32macro_rules! ws0_leaf {
33  ($name:ident, $byte:expr, $token:expr) => (
34    pub fn $name(input: ParseString) -> ParseResult<Token> {
35      if input.is_empty() {
36        return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
37      }
38      let start = input.loc();
39      let byte = input.graphemes[input.cursor];
40      let (input, _) = whitespace0(input)?;
41      let (input, _) = tag($byte)(input)?;
42      let (input, _) = whitespace0(input)?;
43      let end = input.loc();
44      let src_range = SourceRange { start, end };
45      Ok((input, Token{kind: $token, chars: $byte.chars().collect::<Vec<char>>(), src_range}))
46    }
47  )
48}
49
50macro_rules! ws1_leaf {
51  ($name:ident, $byte:expr, $token:expr) => (
52    pub fn $name(input: ParseString) -> ParseResult<Token> {
53      if input.is_empty() {
54        return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
55      }
56      let (input, _) = whitespace1(input)?;
57      let start = input.loc();
58      let byte = input.graphemes[input.cursor];
59      let (input, _) = tag($byte)(input)?;
60      let end = input.loc();
61      let (input, _) = whitespace1(input)?;
62      let src_range = SourceRange { start, end };
63      Ok((input, Token{kind: $token, chars: $byte.chars().collect::<Vec<char>>(), src_range}))
64    }
65  )
66}
67
68// Tokens
69// ----------------------------------------------------------------------------
70// Ref: 39003557984811317
71
72leaf!{ampersand, "&", TokenKind::Ampersand}
73leaf!{apostrophe, "'", TokenKind::Apostrophe}
74leaf!{asterisk, "*", TokenKind::Asterisk}
75leaf!{at, "@", TokenKind::At}
76leaf!{bar, "|", TokenKind::Bar}
77leaf!{backslash, "\\", TokenKind::Backslash}
78leaf!{caret, "^", TokenKind::Caret}
79leaf!{colon, ":", TokenKind::Colon}
80leaf!{comma, ",", TokenKind::Comma}
81leaf!{dash, "-", TokenKind::Dash}
82leaf!{dollar, "$", TokenKind::Dollar}
83leaf!{equal, "=", TokenKind::Equal}
84leaf!{exclamation, "!", TokenKind::Exclamation}
85leaf!{grave, "`", TokenKind::Grave}
86leaf!{hashtag, "#", TokenKind::HashTag}
87leaf!{negate, "¬", TokenKind::Not}
88leaf!{percent, "%", TokenKind::Percent}
89leaf!{period, ".", TokenKind::Period}
90leaf!{plus, "+", TokenKind::Plus}
91leaf!{question, "?", TokenKind::Question}
92leaf!{quote, "\"", TokenKind::Quote}
93leaf!{semicolon, ";", TokenKind::Semicolon}
94leaf!{slash, "/", TokenKind::Slash}
95leaf!{tilde, "~", TokenKind::Tilde}
96leaf!{underscore, "_", TokenKind::Underscore}
97
98leaf!{check_mark, "✓", TokenKind::True}
99leaf!{cross, "✗", TokenKind::False}
100leaf!{english_true_literal, "true", TokenKind::True}
101leaf!{english_false_literal, "false", TokenKind::False}
102
103leaf!{space, " ", TokenKind::Space}
104leaf!{nbsp, "\u{00A0}", TokenKind::Space}
105leaf!{thin_space, "\u{2009}", TokenKind::Space}
106
107leaf!{new_line_char, "\n", TokenKind::Newline}
108leaf!{carriage_return, "\r", TokenKind::CarriageReturn}
109leaf!{carriage_return_new_line, "\r\n", TokenKind::CarriageReturn}
110leaf!{tab, "\t", TokenKind::Tab}
111
112leaf!{left_bracket, "[", TokenKind::LeftBracket}
113leaf!{left_parenthesis, "(", TokenKind::LeftParenthesis}
114leaf!{left_brace, "{", TokenKind::LeftBrace}
115leaf!{left_angle, "<", TokenKind::LeftAngle}
116
117leaf!{right_bracket, "]", TokenKind::RightBracket}
118leaf!{right_parenthesis, ")", TokenKind::RightParenthesis}
119leaf!{right_brace, "}", TokenKind::RightBrace}
120leaf!{right_angle, ">", TokenKind::RightAngle}
121
122leaf!{box_tl_round, "╭", TokenKind::BoxDrawing}
123leaf!{box_tr_round, "╮", TokenKind::BoxDrawing}
124leaf!{box_bl_round, "╰", TokenKind::BoxDrawing}
125leaf!{box_br_round, "╯", TokenKind::BoxDrawing}
126
127leaf!{box_tl_bold, "┏", TokenKind::BoxDrawing}
128leaf!{box_tr_bold, "┓", TokenKind::BoxDrawing} 
129leaf!{box_bl_bold, "┗", TokenKind::BoxDrawing}
130leaf!{box_br_bold, "┛", TokenKind::BoxDrawing}
131
132leaf!{box_tl, "┌", TokenKind::BoxDrawing}
133leaf!{box_tr, "┐", TokenKind::BoxDrawing}
134leaf!{box_bl, "└", TokenKind::BoxDrawing}
135leaf!{box_br, "┘", TokenKind::BoxDrawing}
136
137leaf!{box_cross, "┼", TokenKind::BoxDrawing}
138leaf!{box_horz, "─", TokenKind::BoxDrawing}
139leaf!{box_t_left, "├", TokenKind::BoxDrawing}
140leaf!{box_t_right, "┤", TokenKind::BoxDrawing}
141leaf!{box_t_top, "┬", TokenKind::BoxDrawing}
142leaf!{box_t_bottom, "┴", TokenKind::BoxDrawing}
143leaf!{box_vert, "│", TokenKind::BoxDrawing}
144leaf!{box_vert_bold, "┃", TokenKind::BoxDrawing}
145
146leaf!(abstract_sigil, "%%", TokenKind::AbstractSigil);
147leaf!(emphasis_sigil, "*", TokenKind::EmphasisSigil);
148leaf!(equation_sigil, "$$", TokenKind::EquationSigil);
149leaf!(footnote_prefix, "[^", TokenKind::FootnotePrefix);
150leaf!(float_left, "<<:", TokenKind::FloatLeft);
151leaf!(float_right, ":>>", TokenKind::FloatRight);
152leaf!(http_prefix, "http", TokenKind::HttpPrefix);
153leaf!(highlight_sigil, "!!", TokenKind::HighlightSigil);
154leaf!(img_prefix, "![", TokenKind::ImgPrefix);
155leaf!(quote_sigil, ">", TokenKind::QuoteSigil);
156leaf!(question_sigil, "(?)>", TokenKind::QuestionSigil);
157leaf!(info_sigil, "(i)>", TokenKind::InfoSigil);
158leaf!(idea_sigil, "(*)>", TokenKind::IdeaSigil);
159leaf!(warning_sigil, "(!)>", TokenKind::WarningSigil);
160leaf!(error_sigil, "(x)>", TokenKind::ErrorSigil);
161leaf!(error_alt_sigil, "(✗)>", TokenKind::ErrorSigil);
162leaf!(success_check_sigil, "(✓)>", TokenKind::SuccessSigil);
163leaf!(success_sigil, "(+)>", TokenKind::SuccessSigil);
164leaf!(strike_sigil, "~~", TokenKind::StrikeSigil);
165leaf!(strong_sigil, "**", TokenKind::StrongSigil);
166leaf!(grave_codeblock_sigil, "```", TokenKind::GraveCodeBlockSigil);
167leaf!(tilde_codeblock_sigil, "~~~", TokenKind::TildeCodeBlockSigil);
168leaf!(underline_sigil, "__", TokenKind::UnderlineSigil);
169leaf!(section_sigil, "§", TokenKind::SectionSigil);
170leaf!(mika_section_open, "⸢", TokenKind::MikaSectionOpen);
171leaf!(mika_section_close, "⸥", TokenKind::MikaSectionClose);
172leaf!(prompt_sigil, ">:", TokenKind::PromptSigil);
173
174ws0_leaf!(assign_operator, "=", TokenKind::AssignOperator);
175ws0_leaf!(async_transition_operator, "~>", TokenKind::AsyncTransitionOperator);
176ws0_leaf!(define_operator, ":=", TokenKind::DefineOperator);
177ws0_leaf!(synth_operator, "?=", TokenKind::SynthOperator);
178ws0_leaf!(gen_operator, "@=", TokenKind::GenOperator);
179ws0_leaf!(output_operator_a, "=>", TokenKind::OutputOperator);
180ws0_leaf!(output_operator_u, "⇒", TokenKind::OutputOperator);
181ws0_leaf!(transition_operator_a, "->", TokenKind::TransitionOperator);
182ws0_leaf!(transition_operator_u, "→", TokenKind::TransitionOperator);
183ws0_leaf!(generator_arrow, "<-", TokenKind::GeneratorArrow);
184ws0_leaf!(generator_arrow_u, "←", TokenKind::GeneratorArrow);
185ws0_leaf!(spread_operator_a, "...", TokenKind::SpreadOperator);
186ws0_leaf!(spread_operator_u, "…", TokenKind::SpreadOperator);
187
188// transition_operator := "->" | "→" ;
189pub fn transition_operator(input: ParseString) -> ParseResult<Token> {
190  let (input, operator) = alt((transition_operator_a, transition_operator_u))(input)?;
191  Ok((input, operator))
192}
193
194// output_operator := "=>" | "⇒" ;
195pub fn output_operator(input: ParseString) -> ParseResult<Token> {
196  let (input, operator) = alt((output_operator_a, output_operator_u))(input)?;
197  Ok((input, operator))
198}
199
200// emoji-grapheme := ?emoji-grapheme-literal? ;
201pub fn emoji_grapheme(mut input: ParseString) -> ParseResult<String> {
202  if let Some(matched) = input.consume_emoji() {
203    Ok((input, matched))
204  } else {
205    Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
206  }
207}
208
209// alpha := ?alpha-literal? ;
210pub fn alpha(mut input: ParseString) -> ParseResult<String> {
211  if let Some(matched) = input.consume_alpha() {
212    Ok((input, matched))
213  } else {
214    Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
215  }
216}
217
218// digit := ?digit-literal? ;
219pub fn digit(mut input: ParseString) -> ParseResult<String> {
220  if let Some(matched) = input.consume_digit() {
221    Ok((input, matched))
222  } else {
223    Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
224  }
225}
226
227// any := ?any-character? ;
228pub fn any(mut input: ParseString) -> ParseResult<String> {
229  if let Some(matched) = input.consume_one() {
230    Ok((input, matched))
231  } else {
232    Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
233  }
234}
235
236pub fn any_token(mut input: ParseString) -> ParseResult<Token> {
237  if input.is_empty() {
238    return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
239  }
240  let start = input.loc();
241  let byte = input.graphemes[input.cursor];
242  if let Some(matched) = input.consume_one() {
243    let end = input.loc();
244    let src_range = SourceRange { start, end };
245    Ok((input, Token{kind: TokenKind::Any, chars: byte.chars().collect::<Vec<char>>(), src_range}))
246  } else {
247    Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
248  }
249}
250
251// forbidden-emoji := box-drawing | other-forbidden-shapes ;
252pub fn forbidden_emoji(input: ParseString) -> ParseResult<Token> {
253  alt((box_drawing_emoji, nbsp, thin_space, mika_section_open, mika_section_close))(input)
254}
255
256// emoji := (!forbidden-emoji, emoji-grapheme) ;
257pub fn emoji(input: ParseString) -> ParseResult<Token> {
258  let msg1 = "Cannot be a box-drawing emoji";
259  let start = input.loc();
260  let (input, _) = is_not(forbidden_emoji)(input)?;
261  let (input, g) = emoji_grapheme(input)?;
262  let end = input.loc();
263  let src_range = SourceRange { start, end };
264  Ok((input, Token{kind: TokenKind::Emoji, chars: g.chars().collect::<Vec<char>>(), src_range}))
265}
266
267// alpha-token := alpha-literal-token ;
268pub fn alpha_token(input: ParseString) -> ParseResult<Token> {
269  let (input, (g, src_range)) = range(alpha)(input)?;
270  Ok((input, Token{kind: TokenKind::Alpha, chars: g.chars().collect::<Vec<char>>(), src_range}))
271}
272
273// digit-token := digit-literal-token ;
274pub fn digit_token(input: ParseString) -> ParseResult<Token> {
275  let (input, (g, src_range)) = range(digit)(input)?;
276  Ok((input, Token{kind: TokenKind::Digit, chars: g.chars().collect::<Vec<char>>(), src_range}))
277}
278
279// alphanumeric := alpha | digit ;
280pub fn alphanumeric(input: ParseString) -> ParseResult<Token> {
281  let (input, token) = alt((alpha_token, digit_token))(input)?; 
282  Ok((input, token))
283}
284
285// underscore-digit := underscore, digit ;
286pub fn underscore_digit(input: ParseString) -> ParseResult<Token> {
287  let (input, _) = underscore(input)?;
288  let (input, digit) = digit_token(input)?;
289  Ok((input,digit))
290}
291
292// digit-sequence := digit, (underscore-digit | digit)* ;
293pub fn digit_sequence(input: ParseString) -> ParseResult<Vec<Token>> {
294  let (input, mut start) = digit_token(input)?;
295  let (input, mut tokens) = many0(alt((underscore_digit,digit_token)))(input)?;
296  let mut all = vec![start];
297  all.append(&mut tokens);
298  Ok((input,all))
299}
300
301// grouping-symbol := left-parenthesis | right-parenthesis | left-angle | right-angle | left-brace | right-brace | left-bracket | right-bracket ;
302pub fn grouping_symbol(input: ParseString) -> ParseResult<Token> {
303  let (input, grouping) = alt((left_parenthesis, right_parenthesis, left_angle, right_angle, left_brace, right_brace, left_bracket, right_bracket))(input)?;
304  Ok((input, grouping))
305}
306
307// punctuation := period | exclamation | question | comma | colon | semicolon | quote | apostrophe ;
308pub fn punctuation(input: ParseString) -> ParseResult<Token> {
309  let (input, punctuation) = alt((period, exclamation, question, comma, colon, semicolon, quote, apostrophe))(input)?;
310  Ok((input, punctuation))
311}
312
313// escaped-char := "\" ,  alpha | symbol | punctuation ;
314pub fn escaped_char(input: ParseString) -> ParseResult<Token> {
315  let (input, _) = backslash(input)?;
316  let (input, mut symbol) = alt((alpha_token, symbol, punctuation))(input)?;
317  // Update kind
318  symbol.kind = TokenKind::EscapedChar;
319  // Transform the char to visible escaped form if needed
320  symbol.chars = symbol.chars.iter().flat_map(|&c| {
321    match c {
322      'n' => vec!['\n'],
323      't' => vec!['\t'],
324      'r' => vec!['\r'],
325      other => vec![other],
326    }
327  }).collect();
328  Ok((input, symbol))
329}
330
331// symbol := ampersand | dollar | bar | percent | at | slash | hashtag | equal | backslash | tilde | plus | dash | asterisk | caret | underscore ;
332pub fn symbol(input: ParseString) -> ParseResult<Token> {
333  let (input, symbol) = alt((ampersand, grave, dollar, bar, percent, at, slash, hashtag, equal, backslash, tilde, plus, dash, asterisk, caret, underscore))(input)?;
334  Ok((input, symbol))
335}
336
337// identifier-symbol := ampersand | dollar | bar | percent | at | slash | hashtag | backslash | tilde | plus | dash | asterisk | caret ;
338pub fn identifier_symbol(input: ParseString) -> ParseResult<Token> {
339  let (input, symbol) = alt((ampersand, dollar, percent, at, slash, hashtag, backslash, tilde, plus, dash, asterisk, caret))(input)?;
340  Ok((input, symbol))
341}
342
343// text := alpha | digit | space | emoji | forbidden_emoji | space | tab | escaped-char | punctuation | grouping-symbol | symbol ;
344pub fn text(input: ParseString) -> ParseResult<Token> {
345  let (input, text) = alt((alpha_token, digit_token, emoji, forbidden_emoji, space, tab, escaped_char, punctuation, grouping_symbol, symbol))(input)?;
346  Ok((input, text))
347}
348
349// raw-text := alpha | digit | emoji | forbidden_emoji | space | tab | punctuation | grouping_symbol | symbol ;
350pub fn raw_text(input: ParseString) -> ParseResult<Token> {
351  let (input, text) = alt((alpha_token, digit_token, emoji, forbidden_emoji, space, tab, punctuation, grouping_symbol, symbol))(input)?;
352  Ok((input, text))
353}
354
355// Whitespace
356// ============================================================================
357// Ref: #35070717845239353
358
359// new-line := (carriage-return, new-line) | new-line-char | carriage-return ;
360pub fn new_line(input: ParseString) -> ParseResult<Token> {
361  let (input, result) = alt((carriage_return_new_line,new_line_char,carriage_return))(input)?;
362  Ok((input, result))
363}
364
365// whitespace := space | new-line | tab ;
366pub fn whitespace(input: ParseString) -> ParseResult<Token> {
367  let (input, space) = alt((space,tab,new_line))(input)?;
368  Ok((input, space))
369}
370
371// ws0 := *whitespace ;
372pub fn whitespace0(input: ParseString) -> ParseResult<()> {
373  let (input, _) = many0(whitespace)(input)?;
374  Ok((input, ()))
375}
376
377// ws1 := +whitespace ;
378pub fn whitespace1(input: ParseString) -> ParseResult<()> {
379  let (input, _) = many1(whitespace)(input)?;
380  Ok((input, ()))
381}
382
383// newline-indent := new-line, *space-tab ;
384pub fn newline_indent(input: ParseString) -> ParseResult<()> {
385  let (input, _) = new_line(input)?;
386  let (input, _) = many0(space_tab)(input)?;
387  Ok((input, ()))
388}
389
390// ws1e := ws1, newline-indent? ;
391pub fn ws1e(input: ParseString) -> ParseResult<()> {
392  let (input, _) = many1(space_tab)(input)?;
393  Ok((input, ()))
394}
395
396// ws0e := ws0, newline-indent? ;
397pub fn ws0e(input: ParseString) -> ParseResult<()> {
398  let (input, _) = many0(space_tab)(input)?;
399  Ok((input, ()))
400}
401
402// space-tab := space | tab ;
403pub fn space_tab(input: ParseString) -> ParseResult<Token> {
404  let (input, space) = alt((space,tab,nbsp,thin_space))(input)?;
405  Ok((input, space))
406}
407
408// space-tab0 := *space-tab ;
409pub fn space_tab0(input: ParseString) -> ParseResult<()> {
410  let (input, _) = many0(space_tab)(input)?;
411  Ok((input, ()))
412}
413
414// space-tab1 := +space-tab ;
415pub fn space_tab1(input: ParseString) -> ParseResult<()> {
416  let (input, _) = many1(space_tab)(input)?;
417  Ok((input, ()))
418}
419
420// list-separator := ws0, ",", ws0 ;
421pub fn list_separator(input: ParseString) -> ParseResult<()> {
422  let (input,_) = nom_tuple((whitespace0,tag(","),whitespace0))(input)?;
423  Ok((input, ()))
424}
425
426// enum-separator := ws0*, "|", ws0 ;
427pub fn enum_separator(input: ParseString) -> ParseResult<()> {
428  let (input,_) = nom_tuple((whitespace0,tag("|"),whitespace0))(input)?;
429  Ok((input, ()))
430}
431
432// Identifiers
433// ----------------------------------------------------------------------------
434// Ref: #40075932908181571
435
436// identifier := (alpha | emoji), (alpha | digit | identifier_symbol | emoji)* ;
437pub fn identifier(input: ParseString) -> ParseResult<Identifier> {
438  let (input, (first, mut rest)) = nom_tuple((alt((alpha_token, emoji)), many0(alt((alpha_token, digit_token, identifier_symbol, emoji)))))(input)?;
439  let mut tokens = vec![first];
440  tokens.append(&mut rest);
441  let mut merged = Token::merge_tokens(&mut tokens).unwrap();
442  merged.kind = TokenKind::Identifier; 
443  Ok((input, Identifier{name: merged}))
444}