use translate::*;
use std::num::from_str_radix;
use std::char;
#[export]
grammar -> Grammar
= __ imports:rust_use* rules:rule*
{ Grammar{ imports:imports, rules:rules } }
rule -> Rule
= exported:exportflag name:identifier returns: returntype equals expression:expression semicolon? {
Rule{ name: name, expr: box expression, ret_type: returns, exported: exported }
}
exportflag -> bool = ("#[export]"/"#[pub]") __ {true} / "" {false}
returntype -> String
= returns tp:(rust_type {match_str.trim().to_string()}) { tp }
/ { "()".to_string() }
rust_use -> RustUse
= "use" __ p:rust_path __ v:(
"::" __ "*" __
{ RustUseGlob(p.clone()) }
/ "::" __ "{" __ names:(identifier ++ ("," __)) "}" __
{ RustUseList(p.clone(), names) }
/ ""
{ RustUseSimple(p.clone()) }
) ";" __ { v }
rust_path -> String
= ( identifier ++ ("::" __)) { match_str.to_string() }
rust_type
= "()" __
/ "[" rust_type "]" __
/ "&" ("'" identifier)? rust_type
/ identifier "<" (("'" identifier "") / rust_type) ** (__ "," __) ">" __
/ identifier "::" rust_type
/ "(" rust_type ++ (__ "," __) ")" __
/ identifier ""
expression -> Expr
= choice
choice -> Expr
= head:sequence tail:(slash s:sequence {s})* {
if tail.len() > 0 {
let mut list = tail;
list.insert(0, head);
ChoiceExpr(list)
} else {
head
}
}
sequence -> Expr
= elements:labeled* code:action {
ActionExpr(elements, code)
}
/ elements:prefixed* {
if elements.len() != 1 {
SequenceExpr(elements)
} else {
elements.into_iter().next().unwrap()
}
}
labeled -> TaggedExpr
= label:identifier colon expression:prefixed {
TaggedExpr{ name: Some(label), expr: box expression }
}
/ expr: prefixed {
TaggedExpr{ name: None, expr: box expr }
}
prefixed -> Expr
= dollar expression:suffixed {
expression
}
// / and code:action {
// fail!("/*Semantic and unsupported*/");
// }
/ and expression:suffixed {
PosAssertExpr(box expression)
}
// / not code:action {
// fail!("/*Semantic not unsupported*/");
// }
/ not expression:suffixed {
NegAssertExpr(box expression)
}
/ suffixed
suffixed -> Expr
= expression:primary question {
OptionalExpr(box expression)
}
/ expression:primary starstar sep:primary {
Repeat(box expression, 0, None, Some(box sep))
}
/ expression:primary plusplus sep:primary {
Repeat(box expression, 1, None, Some(box sep))
}
/ expression:primary star {
Repeat(box expression, 0, None, None)
}
/ expression:primary plus {
Repeat(box expression, 1, None, None)
}
/ expression:primary lbrace n:integer rbrace {
Repeat(box expression, n, Some(n), None)
}
/ expression:primary lbrace min:integer comma max:integer? rbrace {
Repeat(box expression, min, max, None)
}
/ expression:primary lbrace comma max:integer? rbrace {
Repeat(box expression, 0, max, None)
}
/ primary
primary -> Expr
= name:identifier !(string? returntype equals) {
RuleExpr(name)
}
/ literal
/ class
/ dot { AnyCharExpr }
/ lparen expression:expression rparen { expression }
/* "Lexical" elements */
action -> String
= braced:braced __ { braced }
braced -> String
= "{" (((braced "") / nonBraceCharacters)*) "}" {match_str.to_string()}
nonBraceCharacters
= nonBraceCharacter+
nonBraceCharacter
= [^{}]
equals = "=" __
colon = ":" __
semicolon = ";" __
slash = "/" __
and = "&" __
not = "!" __
dollar = "$" __
question = "?" __
star = "*" __
starstar = "**" __
plus = "+" __
plusplus = "++" __
lparen = "(" __
rparen = ")" __
dot = "." __
returns = "->" __
lbrace = "{" __
rbrace = "}" __
comma = "," __
integer -> usize
= i:([0-9]+ { match_str.parse().unwrap() }) __ { i }
/*
* Modeled after ECMA-262, 5th ed., 7.6, but much simplified:
*
* * no Unicode escape sequences
*
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
* part of the identifier
*
* * only [a-zA-Z] is considered a "Unicode letter"
*
* * only [0-9] is considered a "Unicode digit"
*
* The simplifications were made just to make the implementation little bit
* easier, there is no "philosophical" reason behind them.
*
* Contrary to ECMA 262, the "$" character is not valid because it serves other
* purpose in the grammar.
*/
identifier -> String
= chars:((letter / "_") (letter / digit / "_")* {match_str.to_string()}) __ { chars }
/*
* Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely).
*/
literal -> Expr
= value:(doubleQuotedString / singleQuotedString) case_insensitive:"i"? __ {
LiteralExpr(value,case_insensitive.is_some())
}
string -> String
= string:(doubleQuotedString / singleQuotedString) __ { string }
doubleQuotedString -> String
= '"' s:doubleQuotedCharacter* '"' { s.into_iter().collect() }
doubleQuotedCharacter -> char
= simpleDoubleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hex2EscapeSequence
/ unicodeEscapeSequence
/ hex4EscapeSequence
/ hex8EscapeSequence
/ eolEscapeSequence
simpleDoubleQuotedCharacter -> char
= !('"' / "\\" / eolChar) . { match_str.char_at(0) }
singleQuotedString -> String
= "'" s:singleQuotedCharacter* "'" { s.into_iter().collect() }
singleQuotedCharacter -> char
= simpleSingleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hex2EscapeSequence
/ unicodeEscapeSequence
/ hex4EscapeSequence
/ hex8EscapeSequence
/ eolEscapeSequence
simpleSingleQuotedCharacter -> char
= !("'" / "\\" / eolChar) . { match_str.char_at(0) }
class -> Expr
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ {
CharSetExpr(inverted.is_some(), parts)
}
classCharacterRange -> CharSetCase
= begin:bracketDelimitedCharacter "-" end:bracketDelimitedCharacter {
//TODO: check start > end
CharSetCase{start:begin, end:end}
}
classCharacter -> CharSetCase
= char_:bracketDelimitedCharacter {
CharSetCase{start:char_, end:char_}
}
bracketDelimitedCharacter -> char
= simpleBracketDelimitedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hex2EscapeSequence
/ unicodeEscapeSequence
/ hex4EscapeSequence
/ hex8EscapeSequence
/ eolEscapeSequence
simpleBracketDelimitedCharacter -> char
= !("]" / "\\" / eolChar) . { match_str.char_at(0) }
simpleEscapeSequence -> char
= "\\" !(digit / "x" / "u" / "U" / eolChar) . {
match match_str.char_at(1) {
//'b' => '\b',
//'f' => '\f',
'n' => '\n',
'r' => '\r',
't' => '\t',
//'v' => '\v',
x => x
}
}
zeroEscapeSequence -> char
= "\\0" !digit { 0u8 as char }
hex2EscapeSequence -> char
= "\\x" value:(hexDigit hexDigit { from_str_radix::<u32>(match_str, 16) }) {
char::from_u32(value.unwrap()).unwrap()
}
unicodeEscapeSequence -> char
= "\\u{" value:(hexDigit+ { from_str_radix::<u32>(match_str, 16)}) "}" {
char::from_u32(value.unwrap()).unwrap()
}
// Deprecated in Rust, will be removed.
hex4EscapeSequence -> char
= "\\u" value:(hexDigit hexDigit hexDigit hexDigit { from_str_radix::<u32>(match_str, 16)}) {
char::from_u32(value.unwrap()).unwrap()
}
// Deprecated in Rust, will be removed.
hex8EscapeSequence -> char
= "\\U" value:(hexDigit hexDigit hexDigit hexDigit hexDigit hexDigit hexDigit hexDigit { from_str_radix::<u32>(match_str, 16)}) {
char::from_u32(value.unwrap()).unwrap()
}
eolEscapeSequence -> char
= "\\" eol:eol { '\n' }
digit
= [0-9]
hexDigit
= [0-9a-fA-F]
letter
= lowerCaseLetter
/ upperCaseLetter
lowerCaseLetter
= [a-z]
upperCaseLetter
= [A-Z]
__ = (whitespace / eol / comment)*
/* Modeled after ECMA-262, 5th ed., 7.4. */
comment
= singleLineComment
/ multiLineComment
singleLineComment
= "//" (!eolChar .)*
multiLineComment
= "/*" (!"*/" .)* "*/"
/* Modeled after ECMA-262, 5th ed., 7.3. */
eol
= "\n"
/ "\r\n"
/ "\r"
/ "\u{2028}"
/ "\u{2029}"
eolChar
= [\n\r\u2028\u2029]
/* Modeled after ECMA-262, 5th ed., 7.2. */
whitespace
= [ \t\u{00A0}\u{FEFF}\u{1680}\u{180E}\u{2000}-\u{200A}\u{202F}\u{205F}\u{3000}] // \v\f removed