use std::collections::hash_map::HashMap;
use lexer::Condition;
use lexer::LexerDef;
use lexer::Rule;
use regex;
use regex::Regex;
use std::rc::Rc;
use syntax::ast::Expr;
use syntax::ast::Ident;
use syntax::ast::Name;
use syntax::ast::Ty;
use syntax::codemap::Span;
use syntax::parse;
use syntax::parse::token;
use syntax::parse::token::keywords;
use syntax::diagnostic::FatalError;
use syntax::parse::parser::Parser;
use syntax::ptr::P;
use util::BinSetu8;
trait Tokenizer {
fn token<'a>(&'a self) -> &'a token::Token;
fn bump(&mut self) -> Result<(),FatalError>;
fn bump_and_get(&mut self) -> Result<token::Token,FatalError>;
fn eat(&mut self, tok: &token::Token) -> Result<bool,FatalError>;
fn expect(&mut self, tok: &token::Token) -> Result<(),FatalError>;
fn last_span(&self) -> Span;
fn span_fatal(&mut self, sp: Span, m: &str) -> FatalError;
fn unexpected(&mut self) -> FatalError;
fn unexpected_last(&mut self, tok: &token::Token) -> FatalError;
}
impl<'a> Tokenizer for Parser<'a> {
fn token(&self) -> &token::Token { &self.token }
fn bump(&mut self) -> Result<(),FatalError> { self.bump() }
fn bump_and_get(&mut self) -> Result<token::Token,FatalError> {
self.bump_and_get()
}
fn eat(&mut self, tok: &token::Token) -> Result<bool,FatalError> {
self.eat(tok)
}
fn expect(&mut self, tok: &token::Token) -> Result<(),FatalError> {
self.expect(tok)
}
fn last_span(&self) -> Span { self.last_span }
fn span_fatal(&mut self, sp: Span, m: &str) -> FatalError {
Parser::span_fatal(self, sp, m)
}
fn unexpected(&mut self) -> FatalError { Parser::unexpected(self) }
fn unexpected_last(&mut self, tok: &token::Token) -> FatalError {
Parser::unexpected_last(self, tok)
}
}
type Env = HashMap<Name, Rc<Regex>>;
fn get_tokens(parser: &mut Parser) -> Result<Ident,FatalError> {
let token = token::intern("token");
match parser.token {
token::Ident(id, _) if id.name == token => {
try!(parser.bump());
let token = try!(parser.parse_ident());
try!(parser.expect(&token::Semi));
Ok(token)
}
_ => Ok(Ident::new(token::intern("Token")))
}
}
fn get_properties<'a>(parser: &mut Parser)
-> Result<Vec<(Name, P<Ty>, P<Expr>)>,FatalError> {
let mut ret = Vec::new();
let prop = token::intern("property");
loop {
match parser.token {
token::Ident(id, _) if id.name == prop => {
try!(parser.bump());
let name = try!(parser.parse_ident());
try!(parser.expect(&token::Colon));
let ty = parser.parse_ty();
try!(parser.expect(&token::Eq));
let expr = parser.parse_expr();
try!(parser.expect(&token::Semi));
ret.push((name.name, ty, expr));
}
_ => break
}
}
Ok(ret)
}
fn get_char_class<T: Tokenizer>(parser: &mut T)
-> Result<Box<BinSetu8>,FatalError> {
let mut ret = Box::new(BinSetu8::new(256));
loop {
let tok = try!(parser.bump_and_get());
match tok {
token::CloseDelim(token::Bracket) => {
break
}
token::Literal(token::Lit::Char(i), _) => {
let mut ch = parse::char_lit(i.as_str()).0 as u8;
match *parser.token() {
token::BinOp(token::Minus) => {
try!(parser.bump());
let ch2 = match try!(parser.bump_and_get()) {
token::Literal(token::Lit::Char(ch), _) =>
parse::char_lit(ch.as_str()).0 as u8,
_ => return Err(parser.unexpected())
};
if ch >= ch2 {
let last_span = parser.last_span();
return Err(parser.span_fatal(last_span,
"invalid character range"))
}
while ch <= ch2 {
ret.insert(ch);
ch += 1;
}
}
_ => ret.insert(ch)
}
}
token::Literal(token::Lit::Str_(id),_) => {
let s = token::get_name(id);
if s.len() == 0 {
let last_span = parser.last_span();
return Err(parser.span_fatal(last_span,
"bad string constant in character class"))
}
for b in s.bytes() {
ret.insert(b);
}
}
_ => return Err(parser.unexpected_last(&tok))
}
}
Ok(ret)
}
fn get_const<T: Tokenizer>(parser: &mut T, env: &Env)
-> Result<Box<Regex>,FatalError> {
let tok = try!(parser.bump_and_get());
match tok {
token::Dot => Ok(Box::new(regex::Any)),
token::OpenDelim(token::Paren) => get_regex(parser,
&token::CloseDelim(token::Paren), env),
token::OpenDelim(token::Bracket) => {
if try!(parser.eat(&token::BinOp(token::Caret))) {
Ok(Box::new(regex::NotClass(try!(get_char_class(parser)))))
} else {
Ok(Box::new(regex::Class(try!(get_char_class(parser)))))
}
}
token::Literal(token::Lit::Char(ch), _) =>
Ok(Box::new(regex::Char(parse::char_lit(ch.as_str()).0 as u8))),
token::Literal(token::Lit::Str_(id), _) =>
match regex::string(&*token::get_name(id)) {
Some(reg) => Ok(reg),
None => {
let last_span = parser.last_span();
Err(parser.span_fatal(last_span,
"bad string constant in regular expression"))
}
},
token::Ident(id, _) => match env.get(&id.name).cloned() {
Some(value) => Ok(Box::new(regex::Var(value))),
None => {
let last_span = parser.last_span();
Err(parser.span_fatal(last_span,
&format!("unknown identifier: {}",
token::get_name(id.name))))
}
},
_ => Err(parser.unexpected_last(&tok))
}
}
fn get_closure<T: Tokenizer>(parser: &mut T, env: &Env)
-> Result<Box<Regex>,FatalError> {
let reg = try!(get_const(parser, env));
if try!(parser.eat(&token::BinOp(token::Star))) {
Ok(Box::new(regex::Closure(reg)))
} else if try!(parser.eat(&token::BinOp(token::Plus))) {
Ok(Box::new(regex::Cat(reg.clone(), Box::new(regex::Closure(reg)))))
} else if try!(parser.eat(&token::Question)) {
Ok(Box::new(regex::Maybe(reg))) }
else {
Ok(reg)
}
}
fn get_concat<T: Tokenizer>(parser: &mut T, end: &token::Token, env: &Env)
-> Result<Box<Regex>,FatalError> {
let opl = try!(get_closure(parser, env));
if parser.token() == end ||
*parser.token() == token::BinOp(token::Or) {
Ok(opl)
} else {
let opr = try!(get_concat(parser, end, env));
Ok(Box::new(regex::Cat(opl, opr)))
}
}
fn get_regex<T: Tokenizer>(parser: &mut T, end: &token::Token, env: &Env)
-> Result<Box<Regex>,FatalError> {
if try!(parser.eat(end)) {
return Err(parser.unexpected());
}
let left = try!(get_concat(parser, end, env));
if try!(parser.eat(end)) {
Ok(left)
} else {
try!(parser.expect(&token::BinOp(token::Or)));
let right = try!(get_regex(parser, end, env));
Ok(Box::new(regex::Or(left, right)))
}
}
fn get_pattern(parser: &mut Parser, env: &Env)
-> Result<(Ident, Box<Regex>),FatalError> {
let name = try!(parser.parse_ident());
try!(parser.expect(&token::Eq));
let reg = try!(get_regex(parser, &token::Semi, env));
Ok((name, reg))
}
fn get_definitions(parser: &mut Parser)
-> Result<Box<Env>,FatalError> {
let mut ret = Box::new(HashMap::new());
while try!(parser.eat_keyword(keywords::Let)) {
let (id, pat) = try!(get_pattern(parser, &*ret));
ret.insert(id.name, Rc::new(*pat));
}
Ok(ret)
}
fn get_condition(parser: &mut Parser, env: &Env)
-> Result<Vec<Rule>,FatalError> {
let mut ret = Vec::new();
while !try!(parser.eat(&token::CloseDelim(token::Brace))) {
let pattern = try!(get_regex(parser, &token::FatArrow, env));
let action = parser.parse_expr();
try!(parser.eat(&token::Comma));
ret.push(Rule { pattern:pattern, action:action });
}
Ok(ret)
}
fn get_conditions(parser: &mut Parser, env: &Env)
-> Result<Vec<Condition>,FatalError> {
let mut cond_names: HashMap<Name, usize> = HashMap::new();
let mut ret = Vec::new();
let initial = Condition {
name: token::intern("INITIAL"),
rules: Vec::new()
};
cond_names.insert(initial.name, 0);
ret.push(initial);
while parser.token != token::Eof {
match parser.token {
token::Ident(id, _) => {
if parser.look_ahead(1, |tok| tok == &token::OpenDelim(token::Brace)) {
try!(parser.bump());
try!(parser.bump());
let rules = try!(get_condition(parser, env));
match cond_names.get(&id.name).cloned() {
Some(i) => {
ret[i].rules.extend(rules.into_iter());
continue
}
None => ()
}
ret.push(Condition { rules: rules, name: id.name });
cond_names.insert(id.name, ret.len() - 1);
} else {
let reg = try!(get_regex(parser, &token::FatArrow, env));
let expr = parser.parse_expr();
ret[0].rules.push(Rule { pattern: reg, action: expr });
}
}
_ => {
let reg = try!(get_regex(parser, &token::FatArrow, env));
let expr = parser.parse_expr();
ret[0].rules.push(Rule { pattern: reg, action: expr });
}
}
}
Ok(ret)
}
pub fn parse(ident:Ident, parser: &mut Parser) ->
Result<LexerDef,FatalError> {
let tokens = try!(get_tokens(parser));
let props = try!(get_properties(parser));
let defs = try!(get_definitions(parser));
let conditions = try!(get_conditions(parser, &*defs));
Ok(LexerDef { ident:ident, tokens:tokens, properties: props,
conditions: conditions })
}