pub use token::*;
use moore_common::errors::*;
use moore_common::source::*;
use moore_common::name::*;
use cat::CatTokenKind;
use preproc::*;
type CatTokenAndSpan = (CatTokenKind, Span);
pub type TokenAndSpan = (Token, Span);
pub struct Lexer<'a> {
input: Preprocessor<'a>,
peek: [CatTokenAndSpan; 4],
}
impl<'a> Lexer<'a> {
pub fn new(input: Preprocessor<'a>) -> Lexer {
Lexer {
input: input,
peek: [(CatTokenKind::Eof, INVALID_SPAN); 4],
}
}
pub fn bump(&mut self) -> DiagResult2<()> {
self.peek[0] = self.peek[1];
self.peek[1] = self.peek[2];
self.peek[2] = self.peek[3];
self.peek[3] = match self.input.next() {
Some(Err(e)) => return Err(e),
Some(Ok(x)) => x,
None => (CatTokenKind::Eof, self.peek[2].1),
};
Ok(())
}
pub fn next_token(&mut self) -> DiagResult2<TokenAndSpan> {
if self.peek[0].0 == CatTokenKind::Eof {
self.bump()?;
self.bump()?;
self.bump()?;
self.bump()?;
}
let name_table = get_name_table();
loop {
self.skip_noise()?;
if let (CatTokenKind::Symbol(c0), CatTokenKind::Symbol(c1), CatTokenKind::Symbol(c2), CatTokenKind::Symbol(c3)) = (self.peek[0].0, self.peek[1].0, self.peek[2].0, self.peek[3].0) {
let sym = match (c0,c1,c2,c3) {
('<','<','<','=') => Some(Operator(Op::AssignArithShL)),
('>','>','>','=') => Some(Operator(Op::AssignArithShR)),
_ => None,
};
if let Some(tkn) = sym {
let sp = Span::union(self.peek[0].1, self.peek[3].1);
self.bump()?;
self.bump()?;
self.bump()?;
self.bump()?;
return Ok((tkn, sp));
}
}
if let (CatTokenKind::Symbol(c0), CatTokenKind::Symbol(c1), CatTokenKind::Symbol(c2)) = (self.peek[0].0, self.peek[1].0, self.peek[2].0) {
let sym = match (c0,c1,c2) {
('<','<','=') => Some(Operator(Op::AssignLogicShL)),
('>','>','=') => Some(Operator(Op::AssignLogicShR)),
('=','=','=') => Some(Operator(Op::CaseEq)),
('!','=','=') => Some(Operator(Op::CaseNeq)),
('=','=','?') => Some(Operator(Op::WildcardEq)),
('!','=','?') => Some(Operator(Op::WildcardNeq)),
('<','-','>') => Some(Operator(Op::LogicEquiv)),
('<','<','<') => Some(Operator(Op::ArithShL)),
('>','>','>') => Some(Operator(Op::ArithShR)),
('|','-','>') => Some(Operator(Op::SeqImplOl)),
('|','=','>') => Some(Operator(Op::SeqImplNol)),
('#','-','#') => Some(Operator(Op::SeqFollowOl)),
('#','=','#') => Some(Operator(Op::SeqFollowNol)),
_ => None,
};
if let Some(tkn) = sym {
let sp = Span::union(self.peek[0].1, self.peek[2].1);
self.bump()?;
self.bump()?;
self.bump()?;
return Ok((tkn, sp));
}
}
if let (CatTokenKind::Symbol(c0), CatTokenKind::Symbol(c1)) = (self.peek[0].0, self.peek[1].0) {
let sym = match (c0,c1) {
('+','=') => Some(Operator(Op::AssignAdd)),
('-','=') => Some(Operator(Op::AssignSub)),
('*','=') => Some(Operator(Op::AssignMul)),
('/','=') => Some(Operator(Op::AssignDiv)),
('%','=') => Some(Operator(Op::AssignMod)),
('&','=') => Some(Operator(Op::AssignBitAnd)),
('|','=') => Some(Operator(Op::AssignBitOr)),
('^','=') => Some(Operator(Op::AssignBitXor)),
('+','+') => Some(Operator(Op::Inc)),
('-','-') => Some(Operator(Op::Dec)),
('*','*') => Some(Operator(Op::Pow)),
('<','=') => Some(Operator(Op::Leq)),
('>','=') => Some(Operator(Op::Geq)),
('=','=') => Some(Operator(Op::LogicEq)),
('!','=') => Some(Operator(Op::LogicNeq)),
('-','>') => Some(Operator(Op::LogicImpl)),
('|','|') => Some(Operator(Op::LogicOr)),
('&','&') => Some(Operator(Op::LogicAnd)),
('~','&') => Some(Operator(Op::BitNand)),
('~','|') => Some(Operator(Op::BitNor)),
('~','^') => Some(Operator(Op::BitNxor)),
('^','~') => Some(Operator(Op::BitXnor)),
('<','<') => Some(Operator(Op::LogicShL)),
('>','>') => Some(Operator(Op::LogicShR)),
(':',':') => Some(Namespace),
('+',':') => Some(AddColon),
('-',':') => Some(SubColon),
('#','#') => Some(DoubleHashtag),
_ => None,
};
if let Some(tkn) = sym {
let sp = Span::union(self.peek[0].1, self.peek[1].1);
self.bump()?;
self.bump()?;
return Ok((tkn, sp));
}
}
if let CatTokenKind::Symbol(c0) = self.peek[0].0 {
let sym = match c0 {
'=' => Some(Operator(Op::Assign)),
'+' => Some(Operator(Op::Add)),
'-' => Some(Operator(Op::Sub)),
'*' => Some(Operator(Op::Mul)),
'/' => Some(Operator(Op::Div)),
'%' => Some(Operator(Op::Mod)),
'<' => Some(Operator(Op::Lt)),
'>' => Some(Operator(Op::Gt)),
'!' => Some(Operator(Op::LogicNot)),
'~' => Some(Operator(Op::BitNot)),
'&' => Some(Operator(Op::BitAnd)),
'|' => Some(Operator(Op::BitOr)),
'^' => Some(Operator(Op::BitXor)),
'(' => Some(OpenDelim(Paren)),
')' => Some(CloseDelim(Paren)),
'[' => Some(OpenDelim(Brack)),
']' => Some(CloseDelim(Brack)),
'{' => Some(OpenDelim(Brace)),
'}' => Some(CloseDelim(Brace)),
'#' => Some(Hashtag),
',' => Some(Comma),
'.' => Some(Period),
':' => Some(Colon),
';' => Some(Semicolon),
'?' => Some(Ternary),
'@' => Some(At),
_ => None,
};
if let Some(tkn) = sym {
let sp = self.peek[0].1;
self.bump()?;
return Ok((tkn, sp));
}
}
match self.peek[0] {
(CatTokenKind::Text, _) |
(CatTokenKind::Symbol('_'), _) => {
let (m, msp) = self.match_ident()?;
return match find_keyword(&m) {
Some(Kw::Begin) => Ok((OpenDelim(Bgend), msp)),
Some(Kw::End) => Ok((CloseDelim(Bgend), msp)),
Some(kw) => Ok((Keyword(kw), msp)),
None => Ok((Ident(name_table.intern(&m, true)), msp)),
};
}
(CatTokenKind::Symbol('$'), sp) => {
self.bump()?;
return match self.peek[0].0 {
CatTokenKind::Text |
CatTokenKind::Digits |
CatTokenKind::Symbol('_') |
CatTokenKind::Symbol('$') => {
let (m, msp) = self.match_ident()?;
Ok((SysIdent(name_table.intern(&m, true)), Span::union(sp,msp)))
}
_ => Ok((Dollar, sp))
};
},
(CatTokenKind::Symbol('\\'), mut sp) => {
let mut s = String::new();
loop {
self.bump()?;
if self.peek[0].0 == CatTokenKind::Whitespace || self.peek[0].0 == CatTokenKind::Newline || self.peek[0].0 == CatTokenKind::Eof {
break;
}
sp.expand(self.peek[0].1);
s.push_str(&self.peek[0].1.extract());
}
if s.is_empty() {
return Err(DiagBuilder2::fatal("Expected escaped identifier after backslash '\\'").span(sp));
}
return Ok((EscIdent(name_table.intern(&s, true)), sp));
}
(CatTokenKind::Symbol('\''), sp) => {
self.bump()?; return self.match_based_number(None, sp);
}
(CatTokenKind::Digits, mut sp) => {
let value = {
let mut s = String::new();
s.push_str(&sp.extract());
self.bump()?; self.eat_number_body_into(&mut s, &mut sp, false)?;
name_table.intern(&s, true)
};
self.skip_noise()?; match self.peek[0] {
(CatTokenKind::Symbol('\''), _) => {
self.bump()?; return self.match_based_number(Some(value), sp)
},
_ => return Ok((Literal(UnsignedInteger(value)), sp))
}
}
(CatTokenKind::Symbol('"'), mut span) => {
self.bump()?;
let mut s = String::new();
loop {
match self.peek[0] {
(CatTokenKind::Symbol('"'), sp) => {
span.expand(sp);
self.bump()?;
break;
}
(CatTokenKind::Symbol('\\'), sp) => {
span.expand(sp);
self.bump()?;
match self.peek[0] {
(CatTokenKind::Symbol('\\'), sp) => {
span.expand(sp);
s.push('\\');
}
(CatTokenKind::Newline, sp) => { span.expand(sp); },
(CatTokenKind::Symbol('"'), sp) => {
span.expand(sp);
s.push('"');
}
(CatTokenKind::Text, sp) => {
span.expand(sp);
s.push_str(&sp.extract());
}
_ => return Err(DiagBuilder2::fatal("Unknown escape sequence in string").span(span))
}
}
(CatTokenKind::Newline, sp) => return Err(DiagBuilder2::fatal("String literals cannot contain unescaped newlines").span(sp)),
(_, sp) => {
span.expand(sp);
s.push_str(&sp.extract());
}
}
self.bump()?;
}
return Ok((Literal(Str(name_table.intern(&s, true))), span));
}
(CatTokenKind::Eof, sp) => return Ok((Eof, sp)),
(tkn, sp) => return Err(DiagBuilder2::fatal(format!("Unknown token {:?}", tkn)).span(sp)),
}
}
}
fn skip_noise(&mut self) -> DiagResult2<()> {
loop {
match self.peek[0].0 {
CatTokenKind::Whitespace | CatTokenKind::Newline | CatTokenKind::Comment => self.bump()?,
_ => return Ok(())
}
}
}
fn match_ident(&mut self) -> DiagResult2<(String, Span)> {
let mut s = String::new();
let mut sp = self.peek[0].1;
loop {
match self.peek[0] {
(CatTokenKind::Text, this_sp) |
(CatTokenKind::Digits, this_sp) |
(CatTokenKind::Symbol('_'), this_sp) |
(CatTokenKind::Symbol('$'), this_sp) => {
s.push_str(&this_sp.extract());
sp.expand(this_sp);
try!(self.bump());
},
_ => break,
}
}
if s.is_empty() {
return Err(DiagBuilder2::fatal("Could not match an identifier here").span(sp));
}
assert!(!s.is_empty());
Ok((s, sp))
}
fn match_based_number(&mut self, size: Option<Name>, mut span: Span) -> DiagResult2<TokenAndSpan> {
match self.peek[0] {
(CatTokenKind::Text, sp) => {
self.bump()?;
let text = sp.extract();
span.expand(sp);
let mut chars = text.chars();
let mut c = chars.next();
let signed = match c {
Some('s') | Some('S') => {
c = chars.next();
true
},
Some('z') | Some('Z') if text.len() == 1 => return Ok((Literal(UnbasedUnsized('z')), span)),
Some('x') | Some('X') if text.len() == 1 => return Ok((Literal(UnbasedUnsized('x')), span)),
_ => false
};
let base = match c {
Some('d') | Some('D') => 'd',
Some('b') | Some('B') => 'b',
Some('o') | Some('O') => 'o',
Some('h') | Some('H') => 'h',
Some(x) => return Err(DiagBuilder2::fatal(format!("`{}` is not a valid number base", x)).span(span)),
None => return Err(DiagBuilder2::fatal("Missing number base").span(span)),
};
c = chars.next();
let mut body = String::new();
if let Some(c) = c {
body.push(c);
body.push_str(chars.as_str());
} else {
self.skip_noise()?;
}
self.eat_number_body_into(&mut body, &mut span, true)?;
return Ok((Literal(BasedInteger(
size,
signed,
base,
get_name_table().intern(&body, true),
)), span));
}
(CatTokenKind::Digits, sp) if size.is_none() => {
self.bump()?;
let value = sp.extract();
span.expand(sp);
match value.chars().next() {
Some('0') if value.len() == 1 => return Ok((Literal(UnbasedUnsized('0')), span)),
Some('1') if value.len() == 1 => return Ok((Literal(UnbasedUnsized('1')), span)),
_ => return Err(DiagBuilder2::fatal("Unbased unsized literal may only be '0, '1, 'x, or 'z").span(span))
}
}
(CatTokenKind::Symbol('?'), sp) => {
self.bump()?;
span.expand(sp);
return Ok((Literal(UnbasedUnsized('z')), span));
}
_ => return Ok((Apostrophe, span))
}
}
fn eat_number_body_into(&mut self, into: &mut String, span: &mut Span, allow_alphabetic: bool) -> DiagResult2<()> {
loop {
match self.peek[0] {
(CatTokenKind::Digits, sp) |
(CatTokenKind::Text, sp) => {
if self.peek[0].0 == CatTokenKind::Text && !allow_alphabetic {
return Err(DiagBuilder2::fatal("Unsigned number or size of literal must be a decimal and thus cannot contain any letters").span(sp));
}
into.push_str(&sp.extract());
span.expand(sp);
},
(CatTokenKind::Symbol('_'), _) => (),
(CatTokenKind::Symbol('?'), sp) => {
into.push('?');
span.expand(sp);
},
_ => break
}
try!(self.bump());
}
Ok(())
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = DiagResult2<TokenAndSpan>;
fn next(&mut self) -> Option<Self::Item> {
match self.next_token() {
Ok((Eof,_)) => None,
x => Some(x),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use moore_common::source::*;
use moore_common::name::*;
use preproc::*;
fn check(input: &str, expected: &[Token]) {
use std::cell::Cell;
thread_local!(static INDEX: Cell<usize> = Cell::new(0));
let sm = get_source_manager();
let idx = INDEX.with(|i| {
let v = i.get();
i.set(v+1);
v
});
let source = sm.add(&format!("test_{}.sv", idx), input);
let pp = Preprocessor::new(source, &[]);
let lexer = Lexer::new(pp);
let actual: Vec<_> = lexer.map(|x| x.unwrap().0).collect();
assert_eq!(actual, expected);
}
fn check_single(input: &str, expected: Token) {
check(input, &[expected]);
}
fn name(n: &str) -> Name {
get_name_table().intern(n, true)
}
#[test]
fn idents() {
check(
"shiftreg_a busa_index error_condition merge_ab _bus3 n$657",
&vec![
Ident(name("shiftreg_a")),
Ident(name("busa_index")),
Ident(name("error_condition")),
Ident(name("merge_ab")),
Ident(name("_bus3")),
Ident(name("n$657")),
]
);
}
#[test]
fn esc_idents() {
check(
"\\busa+index \\-clock \\***error-condition*** \\net1/\\net2 \\{a,b} \\a*(b+c)",
&vec![
EscIdent(name("busa+index")),
EscIdent(name("-clock")),
EscIdent(name("***error-condition***")),
EscIdent(name("net1/\\net2")),
EscIdent(name("{a,b}")),
EscIdent(name("a*(b+c)")),
]
);
}
#[test]
fn sys_idents() {
check(
"$display $finish $01_ad$as3_",
&vec![
SysIdent(name("display")),
SysIdent(name("finish")),
SysIdent(name("01_ad$as3_")),
]
);
}
#[test]
fn unbased_unsized_literal() {
check_single("'0", Literal(UnbasedUnsized('0')));
check_single("'1", Literal(UnbasedUnsized('1')));
check_single("'X", Literal(UnbasedUnsized('x')));
check_single("'x", Literal(UnbasedUnsized('x')));
check_single("'Z", Literal(UnbasedUnsized('z')));
check_single("'z", Literal(UnbasedUnsized('z')));
check_single("'?", Literal(UnbasedUnsized('z')));
}
#[test]
fn unsized_literal_constant_numbers() {
check(
"659; 'h 837FF; 'o7460", &[
Literal(UnsignedInteger(name("659"))), Semicolon,
Literal(BasedInteger(None, false, 'h', name("837FF"))), Semicolon,
Literal(BasedInteger(None, false, 'o', name("7460"))),
]);
}
#[test]
#[should_panic(expected = "Unsigned number or size of literal must be a decimal")]
fn unsized_literal_constant_numbers_illegal() {
check("4af", &vec![]);
}
#[test]
fn sized_literal_constant_numbers() {
check(
"4'b1001; 5 'D 3; 3'b01x; 12'hx; 16'hz", &[
Literal(BasedInteger(Some(name("4")), false, 'b', name("1001"))), Semicolon,
Literal(BasedInteger(Some(name("5")), false, 'd', name("3"))), Semicolon,
Literal(BasedInteger(Some(name("3")), false, 'b', name("01x"))), Semicolon,
Literal(BasedInteger(Some(name("12")), false, 'h', name("x"))), Semicolon,
Literal(BasedInteger(Some(name("16")), false, 'h', name("z"))),
]);
}
#[test]
fn signed_literal_constant_numbers() {
check(
"4 'shf; 16'sd?", &[
Literal(BasedInteger(Some(name("4")), true, 'h', name("f"))), Semicolon,
Literal(BasedInteger(Some(name("16")), true, 'd', name("?"))),
]);
}
#[test]
fn underscores_in_literal_constant_numbers() {
check(
"27_195_000; 16'b0011_0101_0001_1111; 32 'h 12ab_f001", &[
Literal(UnsignedInteger(name("27195000"))), Semicolon,
Literal(BasedInteger(Some(name("16")), false, 'b', name("0011010100011111"))), Semicolon,
Literal(BasedInteger(Some(name("32")), false, 'h', name("12abf001"))),
]);
}
#[test]
fn multiline_string_literal() {
check(
"$display(\"Humpty Dumpty sat on a wall. \\\nHumpty Dumpty had a great fall.\")", &[
SysIdent(name("display")),
OpenDelim(Paren),
Literal(Str(name("Humpty Dumpty sat on a wall. Humpty Dumpty had a great fall."))),
CloseDelim(Paren),
]);
}
}