use std::collections::hash_map::HashMap;
use lexer::Condition;
use lexer::LexerDef;
use lexer::Rule;
use regex;
use regex::Regex;
use syntax::ast::Expr;
use syntax::ast::Ident;
use syntax::ast::Name;
use syntax::ast::Ty;
use syntax::codemap::Span;
use syntax::parse;
use syntax::parse::token;
use syntax::symbol::{Symbol, keywords};
use syntax::parse::parser::Parser;
use syntax::ptr::P;
use rustc_errors::DiagnosticBuilder;
trait Tokenizer<'a> {
fn token(&self) -> &token::Token;
fn bump(&mut self) -> ();
fn bump_and_get(&mut self) -> token::Token;
fn eat(&mut self, tok: &token::Token) -> bool;
fn eat_keyword(&mut self, kwd: keywords::Keyword) -> bool;
fn check(&mut self, tok: &token::Token) -> bool;
fn check_keyword(&mut self, kwd: keywords::Keyword) -> bool;
fn expect(&mut self, tok: &token::Token) -> Result<(),DiagnosticBuilder<'a>>;
fn parse_ident(&mut self) -> Result<Ident, DiagnosticBuilder<'a>>;
fn last_span(&self) -> Span;
fn span_fatal(&mut self, sp: Span, m: &str) -> DiagnosticBuilder<'a>;
fn unexpected(&mut self) -> DiagnosticBuilder<'a>;
fn unexpected_last(&mut self, tok: &token::Token) -> DiagnosticBuilder<'a>;
}
impl<'a> Tokenizer<'a> for Parser<'a> {
fn token(&self) -> &token::Token { &self.token }
fn bump(&mut self) { self.bump() }
fn bump_and_get(&mut self) -> token::Token {
let token = ::std::mem::replace(&mut self.token, token::Token::Whitespace);
self.bump();
token
}
fn eat(&mut self, tok: &token::Token) -> bool {
self.eat(tok)
}
fn eat_keyword(&mut self, kwd: keywords::Keyword) -> bool {
self.eat_keyword(kwd)
}
fn check(&mut self, tok: &token::Token) -> bool {
self.check(tok)
}
fn check_keyword(&mut self, kwd: keywords::Keyword) -> bool {
self.check_keyword(kwd)
}
fn expect(&mut self, tok: &token::Token) -> Result<(),DiagnosticBuilder<'a>> {
self.expect(tok)
}
fn parse_ident(&mut self) -> Result<Ident, DiagnosticBuilder<'a>> { self.parse_ident() }
fn last_span(&self) -> Span { self.prev_span }
fn span_fatal(&mut self, sp: Span, m: &str) -> DiagnosticBuilder<'a> {
Parser::span_fatal(self, sp, m)
}
fn unexpected(&mut self) -> DiagnosticBuilder<'a> { Parser::unexpected::<()>(self).unwrap_err() }
fn unexpected_last(&mut self, tok: &token::Token) -> DiagnosticBuilder<'a> {
Parser::unexpected_last::<()>(self, tok).unwrap_err()
}
}
type Env = HashMap<Name, usize>;
fn get_tokens<'a>(parser: &mut Parser<'a>) -> Result<Ident,DiagnosticBuilder<'a>> {
let token = Symbol::intern("token");
match parser.token {
token::Ident(id) if id.name == token => {
parser.bump();
let token = try!(parser.parse_ident());
try!(parser.expect(&token::Semi));
Ok(token)
}
_ => Ok(Ident::with_empty_ctxt(Symbol::intern("Token")))
}
}
fn get_properties<'a>(parser: &mut Parser<'a>)
-> Result<Vec<(Name, P<Ty>, P<Expr>)>,DiagnosticBuilder<'a>> {
let mut ret = Vec::new();
let prop = Symbol::intern("property");
loop {
match parser.token {
token::Ident(id) if id.name == prop => {
parser.bump();
let name = try!(parser.parse_ident());
try!(parser.expect(&token::Colon));
let ty = try!(parser.parse_ty());
try!(parser.expect(&token::Eq));
let expr = try!(parser.parse_expr());
try!(parser.expect(&token::Semi));
ret.push((name.name, ty, expr));
}
_ => break
}
}
Ok(ret)
}
fn get_char_class<'a, T: Tokenizer<'a>>(parser: &mut T)
-> Result<regex::CharSet<char>, DiagnosticBuilder<'a>> {
let mut ret = regex::CharSet::new();
loop {
let tok = parser.bump_and_get();
match tok {
token::CloseDelim(token::Bracket) => {
break
}
token::Literal(token::Lit::Char(i), _) => {
let ch = parse::char_lit(&*i.as_str(), None).0;
match *parser.token() {
token::BinOp(token::Minus) => {
parser.bump();
let ch2 = match parser.bump_and_get() {
token::Literal(token::Lit::Char(ch), _) =>
parse::char_lit(&*ch.as_str(), None).0,
_ => return Err(parser.unexpected())
};
if ch >= ch2 {
let last_span = parser.last_span();
return Err(parser.span_fatal(last_span,
"invalid character range"))
}
ret.push(ch .. ch2);
}
_ => { ret.push(ch .. ch); }
}
}
token::Literal(token::Lit::Str_(id),_) => {
if id.as_str().len() == 0 {
let last_span = parser.last_span();
return Err(parser.span_fatal(last_span,
"bad string constant in character class"))
}
for b in id.as_str().chars() {
ret.push(b .. b);
}
}
_ => return Err(parser.unexpected_last(&tok))
}
}
Ok(ret)
}
fn get_const<'a, T: Tokenizer<'a>>(parser: &mut T, env: &Env)
-> Result<Regex,DiagnosticBuilder<'a>> {
let tok = parser.bump_and_get();
match tok {
token::Dot => Ok(Box::new(regex::Literal(regex::Any))),
token::OpenDelim(token::Paren) => get_regex(parser,
&token::CloseDelim(token::Paren), env),
token::OpenDelim(token::Bracket) => {
if parser.eat(&token::BinOp(token::Caret)) {
Ok(Box::new(regex::Literal(
regex::NotClass(try!(get_char_class(parser)))
)))
} else {
Ok(Box::new(regex::Literal(
regex::Class(try!(get_char_class(parser)))
)))
}
}
token::Literal(token::Lit::Char(ch), _) =>
Ok(Box::new(regex::Literal(
regex::Char(parse::char_lit(&*ch.as_str(), None).0)
))),
token::Literal(token::Lit::Str_(id), _) =>
match regex::string(&*id.as_str()) {
Some(reg) => Ok(reg),
None => {
let last_span = parser.last_span();
Err(parser.span_fatal(last_span,
"bad string constant in regular expression"))
}
},
token::Ident(id) => match env.get(&id.name).cloned() {
Some(value) => Ok(Box::new(regex::Var(value))),
None => {
let last_span = parser.last_span();
Err(parser.span_fatal(last_span,
&format!("unknown identifier: {}", id.name.as_str())))
}
},
_ => Err(parser.unexpected_last(&tok))
}
}
fn get_closure<'a, T: Tokenizer<'a>>(parser: &mut T, env: &Env)
-> Result<Regex,DiagnosticBuilder<'a>> {
let reg = try!(get_const(parser, env));
if parser.eat(&token::BinOp(token::Star)) {
Ok(Box::new(regex::Closure(reg)))
} else if parser.eat(&token::BinOp(token::Plus)) {
Ok(Box::new(regex::Cat(reg.clone(), Box::new(regex::Closure(reg)))))
} else if parser.eat(&token::Question) {
Ok(Box::new(regex::Maybe(reg))) }
else {
Ok(reg)
}
}
fn get_concat<'a, T: Tokenizer<'a>>(parser: &mut T, end: &token::Token, env: &Env)
-> Result<Regex,DiagnosticBuilder<'a>> {
let opl = try!(get_closure(parser, env));
if parser.check(end) ||
parser.check_keyword(keywords::As) ||
parser.check(&token::BinOp(token::Or)) {
Ok(opl)
} else {
let opr = try!(get_concat(parser, end, env));
Ok(Box::new(regex::Cat(opl, opr)))
}
}
fn get_binding<'a, T: Tokenizer<'a>>(parser: &mut T, end: &token::Token, env: &Env)
-> Result<Regex, DiagnosticBuilder<'a>> {
let expr = try!(get_concat(parser, end, env));
if parser.eat_keyword(keywords::As) {
let name = try!(parser.parse_ident());
Ok(Box::new(regex::Bind(name, expr)))
} else { Ok(expr) }
}
fn get_regex<'a, T: Tokenizer<'a>>(parser: &mut T, end: &token::Token, env: &Env)
-> Result<Regex,DiagnosticBuilder<'a>> {
if parser.eat(end) {
return Err(parser.unexpected());
}
let left = try!(get_binding(parser, end, env));
if parser.eat(end) {
Ok(left)
} else {
try!(parser.expect(&token::BinOp(token::Or)));
let right = try!(get_regex(parser, end, env));
Ok(Box::new(regex::Or(left, right)))
}
}
fn get_pattern<'a>(parser: &mut Parser<'a>, env: &Env)
-> Result<(Ident, Regex),DiagnosticBuilder<'a>> {
let name = try!(parser.parse_ident());
try!(parser.expect(&token::Eq));
let reg = try!(get_regex(parser, &token::Semi, env));
Ok((name, reg))
}
fn get_definitions<'a>(parser: &mut Parser<'a>)
-> Result<(Env, Vec<Regex>), DiagnosticBuilder<'a>> {
let mut env = HashMap::new();
let mut defs = Vec::new();
while parser.eat_keyword(keywords::Let) {
let (id, pat) = try!(get_pattern(parser, &env));
env.insert(id.name, defs.len());
defs.push(pat);
}
Ok((env, defs))
}
fn get_condition<'a>(parser: &mut Parser<'a>, env: &Env)
-> Result<Vec<Rule>,DiagnosticBuilder<'a>> {
let mut ret = Vec::new();
while !parser.eat(&token::CloseDelim(token::Brace)) {
let pattern = try!(get_regex(parser, &token::FatArrow, env));
let action = try!(parser.parse_expr());
parser.eat(&token::Comma);
ret.push(Rule { pattern:pattern, action:action });
}
Ok(ret)
}
fn get_conditions<'a>(parser: &mut Parser<'a>, env: &Env)
-> Result<Vec<Condition>,DiagnosticBuilder<'a>> {
let mut cond_names: HashMap<Name, usize> = HashMap::new();
let mut ret = Vec::new();
let initial = Condition {
name: Symbol::intern("INITIAL"),
span: parser.span,
rules: Vec::new()
};
cond_names.insert(initial.name, 0);
ret.push(initial);
while parser.token != token::Eof {
match parser.token {
token::Ident(id) => {
if parser.look_ahead(1, |tok| tok == &token::OpenDelim(token::Brace)) {
let sp = parser.span;
parser.bump();
parser.bump();
let rules = try!(get_condition(parser, env));
match cond_names.get(&id.name).cloned() {
Some(i) => {
ret[i].rules.extend(rules.into_iter());
continue
}
None => ()
}
ret.push(Condition { rules: rules, span: sp, name: id.name });
cond_names.insert(id.name, ret.len() - 1);
} else {
let reg = try!(get_regex(parser, &token::FatArrow, env));
let expr = try!(parser.parse_expr());
parser.eat(&token::Comma);
ret[0].rules.push(Rule { pattern: reg, action: expr });
}
}
_ => {
let reg = try!(get_regex(parser, &token::FatArrow, env));
let expr = try!(parser.parse_expr());
parser.eat(&token::Comma);
ret[0].rules.push(Rule { pattern: reg, action: expr });
}
}
}
Ok(ret)
}
pub fn parse<'a>(ident:Ident, parser: &mut Parser<'a>) ->
Result<LexerDef,DiagnosticBuilder<'a>> {
let tokens = try!(get_tokens(parser));
let props = try!(get_properties(parser));
let (env, defs) = try!(get_definitions(parser));
let conditions = try!(get_conditions(parser, &env));
Ok(LexerDef { ident:ident, tokens:tokens, properties: props, defs: defs,
conditions: conditions })
}