wee-peg 0.5.4

A simple Parsing Expression Grammar (PEG) parser generator.
Documentation
use translate::*;
use std::char;
use codemap::{ self, Spanned };

#![arguments(file_span: codemap::Span)]

spanned<inner>
  = start:#position node:inner end:#position
  { codemap::Spanned { node, span: file_span.subspan(start as u64, end as u64) } }

pub items -> Vec<Spanned<Item>> = _ items:spanned<item>**_ _ { items }

rule -> Rule
  = legacy_exported:exportflag _ cached:cacheflag _ public:pubflag _ name:identifier _ returns:returntype _ "=" _ expression:expression (_ ";")? {
      Rule{
        name: name,
        expr: Box::new(expression),
        ret_type: returns,
        exported: public || legacy_exported,
        cached: cached
      }
    }

    pubflag -> bool = PUB { true } / { false }
    exportflag -> bool = #quiet<("#[export]" / "#[pub]") {true} / {false}>
    cacheflag -> bool = "#[cache]" {true} / {false}

template -> Template
  = name:identifier _ "<" _ params:COMMASEP<identifier> _ ">" _ "=" _ expression:expression (_ ";")? {
    Template { name: name, params: params, expr: Box::new(expression) }
  }

item -> Item
  = u:rust_use     { Item::Use(u) }
  / r:rule         { Item::Rule(r) }
  / t:template     { Item::Template(t) }
  / grammar_args

  grammar_args -> Item
    = "#![" _ "arguments" _ "(" _ args:COMMASEP<grammar_argument> _ ")" _ "]"
    { Item::GrammarArgs(args) }

    grammar_argument -> (String, String)
      = i:identifier _ ":" _ t:type_string { (i, t) }


type_string -> String = tp:$rust_type _ { tp.to_owned() }

returntype -> String
  = "->" _ tp:type_string { tp }
  / { "()".to_string() }

rust_identifier = #quiet<[a-zA-Z_][a-zA-Z0-9_]*> / #expected("identifier")

rust_use -> String
  = v:$(USE _ rust_path _ (
    "::" _ "*" _
    / "::" _ "{" _ ((rust_identifier _ ("as" _ rust_identifier _ )?) ++ ("," _)) "}" _
    / ("as" _ rust_identifier)?
  ) ";") { v.to_owned() }

rust_path = rust_identifier ++ (_ "::" _)

rust_type
  = "[" _ rust_type _ "]"
  / "&" _ ("mut" _)? ("'" rust_identifier _)? rust_type
  / rust_identifier _ "<" _ (("'" rust_identifier) / rust_type) ** (_ "," _) _ ">"
  / rust_identifier _ "::" _ rust_type
  / "(" _ rust_type ** (_ "," _) _ ")"
  / rust_identifier

expression -> Spanned<Expr>
  = choice

choice -> Spanned<Expr>
  = spanned<head:sequence tail:(_ "/" _ s:sequence {s})* {
      if tail.len() > 0 {
        let mut list = tail;
        list.insert(0, head);
        ChoiceExpr(list)
      } else {
        head.node
      }
    }>

sequence -> Spanned<Expr>
  = spanned<elements:labeled**_ _ code:action {
      ActionExpr(elements, code.0, code.1)
    }
  / elements:prefixed**_ {
      if elements.len() != 1 {
          SequenceExpr(elements)
      } else {
          elements.into_iter().next().unwrap().node
      }
    }>

labeled -> TaggedExpr
  = label:spanned<identifier> _ ":" _ expression:prefixed {
      TaggedExpr{ name: Some(label), expr: Box::new(expression) }
    }
  / expr:prefixed {
      TaggedExpr{ name: None, expr: Box::new(expr) }
  }

prefixed -> Spanned<Expr>
  = spanned<
   "$" _ expression:suffixed {
      MatchStrExpr(Box::new(expression))
    }
  / "&" _ expression:suffixed {
      PosAssertExpr(Box::new(expression))
    }
  / "!" _ expression:suffixed {
      NegAssertExpr(Box::new(expression))
    }>
  / suffixed


suffixed -> Spanned<Expr>
  = spanned<
   e:primary _ "?" {
       OptionalExpr(Box::new(e))
    }
  / e:primary _ "**" _ count:repeatcount _ sep:primary {
      Repeat(Box::new(e), count, Some(Box::new(sep)))
    }
  / e:primary _ "++" _ sep:primary {
      Repeat(Box::new(e), BoundedRepeat::Plus, Some(Box::new(sep)))
    }
  / e:primary _ "*" _ count:repeatcount {
      Repeat(Box::new(e), count, None)
    }
  / e:primary _ "+" {
      Repeat(Box::new(e), BoundedRepeat::Plus, None)
    }>
  / primary

repeatcount -> BoundedRepeat
  = "<" _ n:repeatnum _ ">" { BoundedRepeat::Exact(n) }
  / "<" _ min:repeatnum? _ "," _ max:repeatnum? _ ">" { BoundedRepeat::Both(min, max) }
  / { BoundedRepeat::None }

repeatnum -> String
  = i:integer { i.to_string() }
  / "{" _ e:rust_expr _ "}" {e}

#[cache]
primary -> Spanned<Expr>
  = spanned<
   name:identifier !(_ ("<" / "->" / "=")) {
      RuleExpr(name)
    }
  / name:identifier _ "<" _ args:COMMASEP<expression> _ ">" !( _ "=") {
      TemplateInvoke(name, args)
    }
  / literal
  / regex:regexString { RegexExpr(regex) }
  / class
  / "." { AnyCharExpr }
  / "#position" { PositionExpr }
  / "#quiet" _ "<" _ e:expression _ ">" { QuietExpr(Box::new(e)) }
  / "#expected" _ "(" _ s:doubleQuotedString _ ")" { FailExpr(s) }
  / "#infix" _ "<" _ atom:expression _ ">" _ "{" _ levels:infix_level++_ _ "}"
        { InfixExpr{ atom: Box::new(atom), levels:levels }}
  / "#ext" _ "<" name:identifier ">" { ExtExpr(name) }
  / "(" _ expression:expression _ ")" { expression.node }
  >

infix_level -> InfixLevel
  = assoc:("#L" { InfixAssoc::Left } / "#R" { InfixAssoc::Right} ) _ operators:infix_op++_
  { InfixLevel{ assoc: assoc, operators: operators} }

infix_op -> InfixOperator
  = l:identifier _ op:(var:identifier _ ":" _ {var})? e:primary _ r:identifier _ "{" _ action:rust_expr _ "}"
  { InfixOperator{ operator:Box::new(e), action: action, l_arg:l, op_arg:op, r_arg:r } }

/* "Lexical" elements */

rust_expr -> String
   = literal:$((braced / nonBraceCharacters)*) { format!("{{ {} }}", literal) }

action -> (String, /*is conditional match?*/ bool)
  = "{" cond:"?"? literal:rust_expr "}" {
     (literal, cond.is_some())
  }

braced = "{" ((braced / nonBraceCharacters)*) "}"
nonBraceCharacters = [^{}]+

KEYWORD<k> = k !([a-zA-Z0-9_])
USE = KEYWORD<"use">
PUB = KEYWORD<"pub">

keyword = USE / PUB

integer -> usize
  = i:$([0-9]+) { i.parse().unwrap() }

identifier -> String
  = #quiet<!keyword chars:$([a-zA-Z_][a-zA-Z0-9_]*) { chars.to_owned() }> / #expected("identifier")

COMMASEP<t> = list:t**(_ "," _) (_ ",")? { list }
/*
 * Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
 * vaguely).
 */
literal -> Expr
  = value:(doubleQuotedString / singleQuotedString) case_insensitive:"i"? {
      LiteralExpr(value,case_insensitive.is_some())
    }

regexString -> String
  = string:"@\"" s:regexCharacter* "\"@" _ { s.into_iter().collect() }

regexCharacter -> char
  = !("\"@") c:$. { c.chars().next().unwrap() }

string -> String
  = string:(doubleQuotedString / singleQuotedString) { string }

doubleQuotedString -> String
  = '"' s:doubleQuotedCharacter* '"' { s.into_iter().collect() }

doubleQuotedCharacter -> char
  = simpleDoubleQuotedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hex2EscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleDoubleQuotedCharacter -> char
  = !('"' / "\\" / eolChar) c:$. { c.chars().next().unwrap() }

singleQuotedString -> String
  = "'" s:singleQuotedCharacter* "'" { s.into_iter().collect() }

singleQuotedCharacter -> char
  = simpleSingleQuotedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hex2EscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleSingleQuotedCharacter -> char
  = !("'" / "\\" / eolChar) c:$. { c.chars().next().unwrap() }

class -> Expr
  = "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? {
      CharSetExpr(inverted.is_some(), parts)
    }

classCharacterRange -> CharSetCase
  = begin:bracketDelimitedCharacter "-" end:bracketDelimitedCharacter {
      //TODO: check start > end
      CharSetCase{start:begin, end:end}
    }

classCharacter -> CharSetCase
  = char_:bracketDelimitedCharacter {
      CharSetCase{start:char_, end:char_}
    }

bracketDelimitedCharacter -> char
  = simpleBracketDelimitedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hex2EscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleBracketDelimitedCharacter -> char
  = !("]" / "\\" / eolChar) c:$(.) { c.chars().next().unwrap() }

simpleEscapeSequence -> char
  = "\\" !(digit / "x" / "u" / eolChar) c:$(.) {
      match c.chars().next().unwrap() {
        //'b' => '\b',
        //'f' => '\f',
        'n' => '\n',
        'r' => '\r',
        't' => '\t',
        //'v' => '\v',
         x  => x
      }
    }

zeroEscapeSequence -> char
  = "\\0" !digit { 0u8 as char }

hex2EscapeSequence -> char
  = "\\x" value:$(hexDigit hexDigit) {
      char::from_u32(u32::from_str_radix(value, 16).unwrap()).unwrap()
    }

unicodeEscapeSequence -> char
  = "\\u{" value:$(hexDigit+) "}" {
      char::from_u32(u32::from_str_radix(value, 16).unwrap()).unwrap()
    }

eolEscapeSequence -> char
  = "\\" eol { '\n' }

digit
  = [0-9]

hexDigit
  = [0-9a-fA-F]

_ = #quiet<(whitespace / eol / comment)*>

/* Modeled after ECMA-262, 5th ed., 7.4. */
comment
  = singleLineComment
  / multiLineComment

singleLineComment
  = "//" (!eolChar .)*

multiLineComment
  = "/*" (!"*/" .)* "*/"

/* Modeled after ECMA-262, 5th ed., 7.3. */
eol
  = "\n"
  / "\r\n"
  / "\r"
  / "\u{2028}"
  / "\u{2029}"

eolChar
  = [\n\r\u{2028}\u{2029}]

/* Modeled after ECMA-262, 5th ed., 7.2. */
whitespace
  = [ \t\u{00A0}\u{FEFF}\u{1680}\u{180E}\u{2000}-\u{200A}\u{202F}\u{205F}\u{3000}] // \v\f removed