mech_syntax/
base.rs

1#[macro_use]
2use crate::parser::*;
3use crate::*;
4use crate::label;
5use crate::labelr;
6use nom::{
7  multi::separated_list0,
8  sequence::tuple as nom_tuple,
9};
10use crate::nodes::Kind;
11
12// Lexical Elements
13// ============================================================================
14// Ref: #58393432045966419
15
16macro_rules! leaf {
17  ($name:ident, $byte:expr, $token:expr) => (
18    pub fn $name(input: ParseString) -> ParseResult<Token> {
19      if input.is_empty() {
20        return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
21      }
22      let start = input.loc();
23      let byte = input.graphemes[input.cursor];
24      let (input, _) = tag($byte)(input)?;
25      let end = input.loc();
26      let src_range = SourceRange { start, end };
27      Ok((input, Token{kind: $token, chars: $byte.chars().collect::<Vec<char>>(), src_range}))
28    }
29  )
30}
31
32macro_rules! ws0_leaf {
33  ($name:ident, $byte:expr, $token:expr) => (
34    pub fn $name(input: ParseString) -> ParseResult<Token> {
35      if input.is_empty() {
36        return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
37      }
38      let start = input.loc();
39      let byte = input.graphemes[input.cursor];
40      let (input, _) = whitespace0(input)?;
41      let (input, _) = tag($byte)(input)?;
42      let (input, _) = whitespace0(input)?;
43      let end = input.loc();
44      let src_range = SourceRange { start, end };
45      Ok((input, Token{kind: $token, chars: $byte.chars().collect::<Vec<char>>(), src_range}))
46    }
47  )
48}
49
50macro_rules! ws1_leaf {
51  ($name:ident, $byte:expr, $token:expr) => (
52    pub fn $name(input: ParseString) -> ParseResult<Token> {
53      if input.is_empty() {
54        return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
55      }
56      let (input, _) = whitespace1(input)?;
57      let start = input.loc();
58      let byte = input.graphemes[input.cursor];
59      let (input, _) = tag($byte)(input)?;
60      let end = input.loc();
61      let (input, _) = whitespace1(input)?;
62      let src_range = SourceRange { start, end };
63      Ok((input, Token{kind: $token, chars: $byte.chars().collect::<Vec<char>>(), src_range}))
64    }
65  )
66}
67
68// Tokens
69// ----------------------------------------------------------------------------
70// Ref: 39003557984811317
71
72leaf!{at, "@", TokenKind::At}
73leaf!{hashtag, "#", TokenKind::HashTag}
74leaf!{period, ".", TokenKind::Period}
75leaf!{colon, ":", TokenKind::Colon}
76leaf!{comma, ",", TokenKind::Comma}
77leaf!{percent, "%", TokenKind::Percent}
78leaf!{apostrophe, "'", TokenKind::Apostrophe}
79leaf!{left_bracket, "[", TokenKind::LeftBracket}
80leaf!{right_bracket, "]", TokenKind::RightBracket}
81leaf!{left_parenthesis, "(", TokenKind::LeftParenthesis}
82leaf!{right_parenthesis, ")", TokenKind::RightParenthesis}
83leaf!{left_brace, "{", TokenKind::LeftBrace}
84leaf!{right_brace, "}", TokenKind::RightBrace}
85leaf!{dollar, "$", TokenKind::Dollar}
86leaf!{equal, "=", TokenKind::Equal}
87leaf!{left_angle, "<", TokenKind::LeftAngle}
88leaf!{right_angle, ">", TokenKind::RightAngle}
89leaf!{exclamation, "!", TokenKind::Exclamation}
90leaf!{question, "?", TokenKind::Question}
91leaf!{plus, "+", TokenKind::Plus}
92leaf!{dash, "-", TokenKind::Dash}
93leaf!{underscore, "_", TokenKind::Underscore}
94leaf!{asterisk, "*", TokenKind::Asterisk}
95leaf!{slash, "/", TokenKind::Slash}
96leaf!{backslash, "\\", TokenKind::Backslash}
97leaf!{caret, "^", TokenKind::Caret}
98leaf!{space, " ", TokenKind::Space}
99leaf!{tab, "\t", TokenKind::Tab}
100leaf!{tilde, "~", TokenKind::Tilde}
101leaf!{grave, "`", TokenKind::Grave}
102leaf!{bar, "|", TokenKind::Bar}
103leaf!{quote, "\"", TokenKind::Quote}
104leaf!{ampersand, "&", TokenKind::Ampersand}
105leaf!{semicolon, ";", TokenKind::Semicolon}
106leaf!{new_line_char, "\n", TokenKind::Newline}
107leaf!{carriage_return, "\r", TokenKind::CarriageReturn}
108leaf!{carriage_return_new_line, "\r\n", TokenKind::CarriageReturn}
109leaf!{english_true_literal, "true", TokenKind::True}
110leaf!{english_false_literal, "false", TokenKind::False}
111leaf!{check_mark, "✓", TokenKind::True}
112leaf!{cross, "✗", TokenKind::False}
113leaf!{negate, "¬", TokenKind::Not}
114
115leaf!{box_tl_round, "╭", TokenKind::BoxDrawing}
116leaf!{box_tr_round, "╮", TokenKind::BoxDrawing}
117leaf!{box_bl_round, "╰", TokenKind::BoxDrawing}
118leaf!{box_br_round, "╯", TokenKind::BoxDrawing}
119
120leaf!{box_tl_bold, "┏", TokenKind::BoxDrawing}
121leaf!{box_tr_bold, "┓", TokenKind::BoxDrawing} 
122leaf!{box_bl_bold, "┗", TokenKind::BoxDrawing}
123leaf!{box_br_bold, "┛", TokenKind::BoxDrawing}
124
125leaf!{box_tl, "┌", TokenKind::BoxDrawing}
126leaf!{box_tr, "┐", TokenKind::BoxDrawing}
127leaf!{box_bl, "└", TokenKind::BoxDrawing}
128leaf!{box_br, "┘", TokenKind::BoxDrawing}
129
130leaf!{box_cross, "┼", TokenKind::BoxDrawing}
131leaf!{box_horz, "─", TokenKind::BoxDrawing}
132leaf!{box_t_left, "├", TokenKind::BoxDrawing}
133leaf!{box_t_right, "┤", TokenKind::BoxDrawing}
134leaf!{box_t_top, "┬", TokenKind::BoxDrawing}
135leaf!{box_t_bottom, "┴", TokenKind::BoxDrawing}
136leaf!{box_vert, "│", TokenKind::BoxDrawing}
137leaf!{box_vert_bold, "┃", TokenKind::BoxDrawing}
138
139leaf!(http_prefix, "http", TokenKind::HttpPrefix);
140leaf!(img_prefix, "![", TokenKind::ImgPrefix);
141leaf!(footnote_prefix, "[^", TokenKind::FootnotePrefix);
142leaf!(abstract_sigil, "%%", TokenKind::AbstractSigil);
143leaf!(equation_sigil, "$$", TokenKind::EquationSigil);
144leaf!(highlight_sigil, "!!", TokenKind::HighlightSigil);
145leaf!(quote_sigil, ">", TokenKind::QuoteSigil);
146leaf!(float_left, "<<", TokenKind::FloatLeft);
147leaf!(float_right, ">>", TokenKind::FloatRight);
148leaf!(strong_sigil, "**", TokenKind::StrongSigil);
149leaf!(emphasis_sigil, "*", TokenKind::EmphasisSigil);
150leaf!(underline_sigil, "__", TokenKind::UnderlineSigil);
151leaf!(strike_sigil, "~~", TokenKind::StrikeSigil);
152leaf!(query_sigil, "??", TokenKind::QuerySigil);
153
154ws0_leaf!(define_operator, ":=", TokenKind::DefineOperator);
155ws0_leaf!(assign_operator, "=", TokenKind::AssignOperator);
156ws0_leaf!(output_operator, "=>", TokenKind::OutputOperator);
157ws0_leaf!(async_transition_operator, "~>", TokenKind::AsyncTransitionOperator);
158ws0_leaf!(transition_operator, "->", TokenKind::TransitionOperator);
159
160
161// emoji_grapheme := ?emoji_grapheme_literal? ;
162pub fn emoji_grapheme(mut input: ParseString) -> ParseResult<String> {
163  if let Some(matched) = input.consume_emoji() {
164    Ok((input, matched))
165  } else {
166    Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
167  }
168}
169
170// alpha := ?alpha_literal? ;
171pub fn alpha(mut input: ParseString) -> ParseResult<String> {
172  if let Some(matched) = input.consume_alpha() {
173    Ok((input, matched))
174  } else {
175    Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
176  }
177}
178
179// digit := ?digit_literal? ;
180pub fn digit(mut input: ParseString) -> ParseResult<String> {
181  if let Some(matched) = input.consume_digit() {
182    Ok((input, matched))
183  } else {
184    Err(nom::Err::Error(ParseError::new(input, "Unexpected character")))
185  }
186}
187
188// any := ?any_character? ;
189pub fn any(mut input: ParseString) -> ParseResult<String> {
190  if let Some(matched) = input.consume_one() {
191    Ok((input, matched))
192  } else {
193    Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
194  }
195}
196
197pub fn any_token(mut input: ParseString) -> ParseResult<Token> {
198  if input.is_empty() {
199    return Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
200  }
201  let start = input.loc();
202  let byte = input.graphemes[input.cursor];
203  if let Some(matched) = input.consume_one() {
204    let end = input.loc();
205    let src_range = SourceRange { start, end };
206    Ok((input, Token{kind: TokenKind::Any, chars: byte.chars().collect::<Vec<char>>(), src_range}))
207  } else {
208    Err(nom::Err::Error(ParseError::new(input, "Unexpected eof")))
209  }
210}
211
212// forbidden_emoji := box_drawing | other_forbidden_shapes ;
213pub fn forbidden_emoji(input: ParseString) -> ParseResult<Token> {
214  alt((box_tl, box_br, box_bl, box_tr, box_tr_bold, box_tl_bold, box_br_bold, box_bl_bold, box_t_left,box_tl_round,box_br_round, box_tr_round, box_bl_round, box_vert, box_cross, box_horz, box_t_right, box_t_top, box_t_bottom))(input)
215}
216
217// emoji := (!forbidden_emoji, emoji_grapheme) ;
218pub fn emoji(input: ParseString) -> ParseResult<Token> {
219  let msg1 = "Cannot be a box-drawing emoji";
220  let start = input.loc();
221  let (input, _) = is_not(forbidden_emoji)(input)?;
222  let (input, g) = emoji_grapheme(input)?;
223  let end = input.loc();
224  let src_range = SourceRange { start, end };
225  Ok((input, Token{kind: TokenKind::Emoji, chars: g.chars().collect::<Vec<char>>(), src_range}))
226}
227
228// alpha_token := alpha_literal_token ;
229pub fn alpha_token(input: ParseString) -> ParseResult<Token> {
230  let (input, (g, src_range)) = range(alpha)(input)?;
231  Ok((input, Token{kind: TokenKind::Alpha, chars: g.chars().collect::<Vec<char>>(), src_range}))
232}
233
234// digit_token := digit_literal_token ;
235pub fn digit_token(input: ParseString) -> ParseResult<Token> {
236  let (input, (g, src_range)) = range(digit)(input)?;
237  Ok((input, Token{kind: TokenKind::Digit, chars: g.chars().collect::<Vec<char>>(), src_range}))
238}
239
240// alphanumeric := alpha | digit ;
241pub fn alphanumeric(input: ParseString) -> ParseResult<Token> {
242  let (input, token) = alt((alpha_token, digit_token))(input)?; 
243  Ok((input, token))
244}
245
246// underscore_digit := underscore, digit ;
247pub fn underscore_digit(input: ParseString) -> ParseResult<Token> {
248  let (input, _) = underscore(input)?;
249  let (input, digit) = digit_token(input)?;
250  Ok((input,digit))
251}
252
253// digit-sequence := digit, (underscore-digit | digit)* ;
254pub fn digit_sequence(input: ParseString) -> ParseResult<Vec<Token>> {
255  let (input, mut start) = digit_token(input)?;
256  let (input, mut tokens) = many0(alt((underscore_digit,digit_token)))(input)?;
257  let mut all = vec![start];
258  all.append(&mut tokens);
259  Ok((input,all))
260}
261
262// grouping-symbol := left-parenthesis | right_parenthesis | left-angle | right-angle | left-brace | right-brace | left-bracket | right-bracket ;
263pub fn grouping_symbol(input: ParseString) -> ParseResult<Token> {
264  let (input, grouping) = alt((left_parenthesis, right_parenthesis, left_angle, right_angle, left_brace, right_brace, left_bracket, right_bracket))(input)?;
265  Ok((input, grouping))
266}
267
268// punctuation := period | exclamation | question | comma | colon | semicolon | quote | apostrophe ;
269pub fn punctuation(input: ParseString) -> ParseResult<Token> {
270  let (input, punctuation) = alt((period, exclamation, question, comma, colon, semicolon, quote, apostrophe))(input)?;
271  Ok((input, punctuation))
272}
273
274// escaped_char := "\" ,  symbol | punctuation ;
275pub fn escaped_char(input: ParseString) -> ParseResult<Token> {
276  let (input, _) = backslash(input)?;
277  let (input, symbol) = alt((symbol, punctuation))(input)?;
278  Ok((input, symbol))
279}
280
281// symbol := ampersand | dollar | bar | percent | at | slash | hashtag | equal | backslash | tilde | plus | dash | asterisk | caret | underscore ;
282pub fn symbol(input: ParseString) -> ParseResult<Token> {
283  let (input, symbol) = alt((ampersand, dollar, bar, percent, at, slash, hashtag, equal, backslash, tilde, plus, dash, asterisk, caret, underscore))(input)?;
284  Ok((input, symbol))
285}
286
287// text := alpha | digit | space | tab | escaped_char | punctuation | grouping_symbol | symbol ;
288pub fn text(input: ParseString) -> ParseResult<Token> {
289  let (input, text) = alt((alpha_token, digit_token, emoji, forbidden_emoji, space, tab, escaped_char, punctuation, grouping_symbol, symbol))(input)?;
290  Ok((input, text))
291}
292
293// Whitespace
294// ============================================================================
295// Ref: #35070717845239353
296
297// new-line := (carriage-return, new-line) | new-line-char | carriage-return ;
298pub fn new_line(input: ParseString) -> ParseResult<Token> {
299  let (input, result) = alt((carriage_return_new_line,new_line_char,carriage_return))(input)?;
300  Ok((input, result))
301}
302
303// whitespace := space | new_line | tab ;
304pub fn whitespace(input: ParseString) -> ParseResult<Token> {
305  let (input, space) = alt((space,tab,new_line))(input)?;
306  Ok((input, space))
307}
308
309// ws0 := *whitespace ;
310pub fn whitespace0(input: ParseString) -> ParseResult<()> {
311  let (input, _) = many0(whitespace)(input)?;
312  Ok((input, ()))
313}
314
315// ws1 := +whitespace ;
316pub fn whitespace1(input: ParseString) -> ParseResult<()> {
317  let (input, _) = many1(whitespace)(input)?;
318  Ok((input, ()))
319}
320
321// space-tab := space | tab ;
322pub fn space_tab(input: ParseString) -> ParseResult<Token> {
323  let (input, space) = alt((space,tab))(input)?;
324  Ok((input, space))
325}
326
327// list-separator := ws0, ",", ws0 ;
328pub fn list_separator(input: ParseString) -> ParseResult<()> {
329  let (input,_) = nom_tuple((whitespace0,tag(","),whitespace0))(input)?;
330  Ok((input, ()))
331}
332
333// enum-separator := ws0*, "|", ws0 ;
334pub fn enum_separator(input: ParseString) -> ParseResult<()> {
335  let (input,_) = nom_tuple((whitespace0,tag("|"),whitespace0))(input)?;
336  Ok((input, ()))
337}
338
339// Identifiers
340// ============================================================================
341// Ref: #40075932908181571
342
343// identifier := (alpha | emoji), (alpha | digit | symbol | emoji)* ;
344pub fn identifier(input: ParseString) -> ParseResult<Identifier> {
345  let (input, (first, mut rest)) = nom_tuple((alt((alpha_token, emoji)), many0(alt((alpha_token, digit_token, symbol, emoji)))))(input)?;
346  let mut tokens = vec![first];
347  tokens.append(&mut rest);
348  let mut merged = Token::merge_tokens(&mut tokens).unwrap();
349  merged.kind = TokenKind::Identifier; 
350  Ok((input, Identifier{name: merged}))
351}