use translate::*;
use std::char;
use codemap::{ self, Spanned };
#![arguments(file_span: codemap::Span)]
spanned<inner>
= start:#position node:inner end:#position
{ codemap::Spanned { node, span: file_span.subspan(start as u64, end as u64) } }
pub items -> Vec<Spanned<Item>> = _ items:spanned<item>**_ _ { items }
rule -> Rule
= legacy_exported:exportflag _ cached:cacheflag _ public:pubflag _ name:identifier _ returns:returntype _ "=" _ expression:expression (_ ";")? {
Rule{
name: name,
expr: Box::new(expression),
ret_type: returns,
exported: public || legacy_exported,
cached: cached
}
}
pubflag -> bool = PUB { true } / { false }
exportflag -> bool = #quiet<("#[export]" / "#[pub]") {true} / {false}>
cacheflag -> bool = "#[cache]" {true} / {false}
template -> Template
= name:identifier _ "<" _ params:COMMASEP<identifier> _ ">" _ "=" _ expression:expression (_ ";")? {
Template { name: name, params: params, expr: Box::new(expression) }
}
item -> Item
= u:rust_use { Item::Use(u) }
/ r:rule { Item::Rule(r) }
/ t:template { Item::Template(t) }
/ grammar_args
grammar_args -> Item
= "#![" _ "arguments" _ "(" _ args:COMMASEP<grammar_argument> _ ")" _ "]"
{ Item::GrammarArgs(args) }
grammar_argument -> (String, String)
= i:identifier _ ":" _ t:type_string { (i, t) }
type_string -> String = tp:$rust_type _ { tp.to_owned() }
returntype -> String
= "->" _ tp:type_string { tp }
/ { "()".to_string() }
rust_identifier = #quiet<[a-zA-Z_][a-zA-Z0-9_]*> / #expected("identifier")
rust_use -> String
= v:$(USE _ rust_path _ (
"::" _ "*" _
/ "::" _ "{" _ ((rust_identifier _ ("as" _ rust_identifier _ )?) ++ ("," _)) "}" _
/ ("as" _ rust_identifier)?
) ";") { v.to_owned() }
rust_path = rust_identifier ++ (_ "::" _)
rust_type
= "[" _ rust_type _ "]"
/ "&" _ ("mut" _)? ("'" rust_identifier _)? rust_type
/ rust_identifier _ "<" _ (("'" rust_identifier) / rust_type) ** (_ "," _) _ ">"
/ rust_identifier _ "::" _ rust_type
/ "(" _ rust_type ** (_ "," _) _ ")"
/ rust_identifier
expression -> Spanned<Expr>
= choice
choice -> Spanned<Expr>
= spanned<head:sequence tail:(_ "/" _ s:sequence {s})* {
if tail.len() > 0 {
let mut list = tail;
list.insert(0, head);
ChoiceExpr(list)
} else {
head.node
}
}>
sequence -> Spanned<Expr>
= spanned<elements:labeled**_ _ code:action {
ActionExpr(elements, code.0, code.1)
}
/ elements:prefixed**_ {
if elements.len() != 1 {
SequenceExpr(elements)
} else {
elements.into_iter().next().unwrap().node
}
}>
labeled -> TaggedExpr
= label:spanned<identifier> _ ":" _ expression:prefixed {
TaggedExpr{ name: Some(label), expr: Box::new(expression) }
}
/ expr:prefixed {
TaggedExpr{ name: None, expr: Box::new(expr) }
}
prefixed -> Spanned<Expr>
= spanned<
"$" _ expression:suffixed {
MatchStrExpr(Box::new(expression))
}
/ "&" _ expression:suffixed {
PosAssertExpr(Box::new(expression))
}
/ "!" _ expression:suffixed {
NegAssertExpr(Box::new(expression))
}>
/ suffixed
suffixed -> Spanned<Expr>
= spanned<
e:primary _ "?" {
OptionalExpr(Box::new(e))
}
/ e:primary _ "**" _ count:repeatcount _ sep:primary {
Repeat(Box::new(e), count, Some(Box::new(sep)))
}
/ e:primary _ "++" _ sep:primary {
Repeat(Box::new(e), BoundedRepeat::Plus, Some(Box::new(sep)))
}
/ e:primary _ "*" _ count:repeatcount {
Repeat(Box::new(e), count, None)
}
/ e:primary _ "+" {
Repeat(Box::new(e), BoundedRepeat::Plus, None)
}>
/ primary
repeatcount -> BoundedRepeat
= "<" _ n:repeatnum _ ">" { BoundedRepeat::Exact(n) }
/ "<" _ min:repeatnum? _ "," _ max:repeatnum? _ ">" { BoundedRepeat::Both(min, max) }
/ { BoundedRepeat::None }
repeatnum -> String
= i:integer { i.to_string() }
/ "{" _ e:rust_expr _ "}" {e}
#[cache]
primary -> Spanned<Expr>
= spanned<
name:identifier !(_ ("<" / "->" / "=")) {
RuleExpr(name)
}
/ name:identifier _ "<" _ args:COMMASEP<expression> _ ">" !( _ "=") {
TemplateInvoke(name, args)
}
/ literal
/ regex:regexString { RegexExpr(regex) }
/ class
/ "." { AnyCharExpr }
/ "#position" { PositionExpr }
/ "#quiet" _ "<" _ e:expression _ ">" { QuietExpr(Box::new(e)) }
/ "#expected" _ "(" _ s:doubleQuotedString _ ")" { FailExpr(s) }
/ "#infix" _ "<" _ atom:expression _ ">" _ "{" _ levels:infix_level++_ _ "}"
{ InfixExpr{ atom: Box::new(atom), levels:levels }}
/ "#ext" _ "<" name:identifier ">" { ExtExpr(name) }
/ "(" _ expression:expression _ ")" { expression.node }
>
infix_level -> InfixLevel
= assoc:("#L" { InfixAssoc::Left } / "#R" { InfixAssoc::Right} ) _ operators:infix_op++_
{ InfixLevel{ assoc: assoc, operators: operators} }
infix_op -> InfixOperator
= l:identifier _ op:(var:identifier _ ":" _ {var})? e:primary _ r:identifier _ "{" _ action:rust_expr _ "}"
{ InfixOperator{ operator:Box::new(e), action: action, l_arg:l, op_arg:op, r_arg:r } }
/* "Lexical" elements */
rust_expr -> String
= literal:$((braced / nonBraceCharacters)*) { format!("{{ {} }}", literal) }
action -> (String, /*is conditional match?*/ bool)
= "{" cond:"?"? literal:rust_expr "}" {
(literal, cond.is_some())
}
braced = "{" ((braced / nonBraceCharacters)*) "}"
nonBraceCharacters = [^{}]+
KEYWORD<k> = k !([a-zA-Z0-9_])
USE = KEYWORD<"use">
PUB = KEYWORD<"pub">
keyword = USE / PUB
integer -> usize
= i:$([0-9]+) { i.parse().unwrap() }
identifier -> String
= #quiet<!keyword chars:$([a-zA-Z_][a-zA-Z0-9_]*) { chars.to_owned() }> / #expected("identifier")
COMMASEP<t> = list:t**(_ "," _) (_ ",")? { list }
/*
* Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely).
*/
literal -> Expr
= value:(doubleQuotedString / singleQuotedString) case_insensitive:"i"? {
LiteralExpr(value,case_insensitive.is_some())
}
regexString -> String
= string:"@\"" s:regexCharacter* "\"@" _ { s.into_iter().collect() }
regexCharacter -> char
= !("\"@") c:$. { c.chars().next().unwrap() }
string -> String
= string:(doubleQuotedString / singleQuotedString) { string }
doubleQuotedString -> String
= '"' s:doubleQuotedCharacter* '"' { s.into_iter().collect() }
doubleQuotedCharacter -> char
= simpleDoubleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hex2EscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleDoubleQuotedCharacter -> char
= !('"' / "\\" / eolChar) c:$. { c.chars().next().unwrap() }
singleQuotedString -> String
= "'" s:singleQuotedCharacter* "'" { s.into_iter().collect() }
singleQuotedCharacter -> char
= simpleSingleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hex2EscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleSingleQuotedCharacter -> char
= !("'" / "\\" / eolChar) c:$. { c.chars().next().unwrap() }
class -> Expr
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? {
CharSetExpr(inverted.is_some(), parts)
}
classCharacterRange -> CharSetCase
= begin:bracketDelimitedCharacter "-" end:bracketDelimitedCharacter {
//TODO: check start > end
CharSetCase{start:begin, end:end}
}
classCharacter -> CharSetCase
= char_:bracketDelimitedCharacter {
CharSetCase{start:char_, end:char_}
}
bracketDelimitedCharacter -> char
= simpleBracketDelimitedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hex2EscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleBracketDelimitedCharacter -> char
= !("]" / "\\" / eolChar) c:$(.) { c.chars().next().unwrap() }
simpleEscapeSequence -> char
= "\\" !(digit / "x" / "u" / eolChar) c:$(.) {
match c.chars().next().unwrap() {
//'b' => '\b',
//'f' => '\f',
'n' => '\n',
'r' => '\r',
't' => '\t',
//'v' => '\v',
x => x
}
}
zeroEscapeSequence -> char
= "\\0" !digit { 0u8 as char }
hex2EscapeSequence -> char
= "\\x" value:$(hexDigit hexDigit) {
char::from_u32(u32::from_str_radix(value, 16).unwrap()).unwrap()
}
unicodeEscapeSequence -> char
= "\\u{" value:$(hexDigit+) "}" {
char::from_u32(u32::from_str_radix(value, 16).unwrap()).unwrap()
}
eolEscapeSequence -> char
= "\\" eol { '\n' }
digit
= [0-9]
hexDigit
= [0-9a-fA-F]
_ = #quiet<(whitespace / eol / comment)*>
/* Modeled after ECMA-262, 5th ed., 7.4. */
comment
= singleLineComment
/ multiLineComment
singleLineComment
= "//" (!eolChar .)*
multiLineComment
= "/*" (!"*/" .)* "*/"
/* Modeled after ECMA-262, 5th ed., 7.3. */
eol
= "\n"
/ "\r\n"
/ "\r"
/ "\u{2028}"
/ "\u{2029}"
eolChar
= [\n\r\u{2028}\u{2029}]
/* Modeled after ECMA-262, 5th ed., 7.2. */
whitespace
= [ \t\u{00A0}\u{FEFF}\u{1680}\u{180E}\u{2000}-\u{200A}\u{202F}\u{205F}\u{3000}] // \v\f removed