use crate::cat::CatTokenKind;
use crate::preproc::*;
pub use crate::token::*;
use moore_common::errors::*;
use moore_common::name::*;
use moore_common::source::*;
type CatTokenAndSpan = (CatTokenKind, Span);
pub type TokenAndSpan = (Token, Span);
pub struct Lexer<'a> {
input: Preprocessor<'a>,
peek: [CatTokenAndSpan; 4],
}
impl<'a> Lexer<'a> {
pub fn new(input: Preprocessor<'a>) -> Lexer {
Lexer {
input: input,
peek: [(CatTokenKind::Eof, INVALID_SPAN); 4],
}
}
pub fn bump(&mut self) -> DiagResult2<()> {
self.peek[0] = self.peek[1];
self.peek[1] = self.peek[2];
self.peek[2] = self.peek[3];
self.peek[3] = match self.input.next() {
Some(Err(e)) => return Err(e),
Some(Ok(x)) => x,
None => (CatTokenKind::Eof, self.peek[2].1),
};
Ok(())
}
pub fn next_token(&mut self) -> DiagResult2<TokenAndSpan> {
if self.peek[0].0 == CatTokenKind::Eof {
self.bump()?;
self.bump()?;
self.bump()?;
self.bump()?;
}
let name_table = get_name_table();
loop {
self.skip_noise()?;
if let (
CatTokenKind::Symbol(c0),
CatTokenKind::Symbol(c1),
CatTokenKind::Symbol(c2),
CatTokenKind::Symbol(c3),
) = (
self.peek[0].0,
self.peek[1].0,
self.peek[2].0,
self.peek[3].0,
) {
let sym = match (c0, c1, c2, c3) {
('<', '<', '<', '=') => Some(Operator(Op::AssignArithShL)),
('>', '>', '>', '=') => Some(Operator(Op::AssignArithShR)),
_ => None,
};
if let Some(tkn) = sym {
let sp = Span::union(self.peek[0].1, self.peek[3].1);
self.bump()?;
self.bump()?;
self.bump()?;
self.bump()?;
return Ok((tkn, sp));
}
}
if let (CatTokenKind::Symbol(c0), CatTokenKind::Symbol(c1), CatTokenKind::Symbol(c2)) =
(self.peek[0].0, self.peek[1].0, self.peek[2].0)
{
let sym = match (c0, c1, c2) {
('<', '<', '=') => Some(Operator(Op::AssignLogicShL)),
('>', '>', '=') => Some(Operator(Op::AssignLogicShR)),
('=', '=', '=') => Some(Operator(Op::CaseEq)),
('!', '=', '=') => Some(Operator(Op::CaseNeq)),
('=', '=', '?') => Some(Operator(Op::WildcardEq)),
('!', '=', '?') => Some(Operator(Op::WildcardNeq)),
('<', '-', '>') => Some(Operator(Op::LogicEquiv)),
('<', '<', '<') => Some(Operator(Op::ArithShL)),
('>', '>', '>') => Some(Operator(Op::ArithShR)),
('|', '-', '>') => Some(Operator(Op::SeqImplOl)),
('|', '=', '>') => Some(Operator(Op::SeqImplNol)),
('#', '-', '#') => Some(Operator(Op::SeqFollowOl)),
('#', '=', '#') => Some(Operator(Op::SeqFollowNol)),
_ => None,
};
if let Some(tkn) = sym {
let sp = Span::union(self.peek[0].1, self.peek[2].1);
self.bump()?;
self.bump()?;
self.bump()?;
return Ok((tkn, sp));
}
}
if let (CatTokenKind::Symbol(c0), CatTokenKind::Symbol(c1)) =
(self.peek[0].0, self.peek[1].0)
{
let sym = match (c0, c1) {
('+', '=') => Some(Operator(Op::AssignAdd)),
('-', '=') => Some(Operator(Op::AssignSub)),
('*', '=') => Some(Operator(Op::AssignMul)),
('/', '=') => Some(Operator(Op::AssignDiv)),
('%', '=') => Some(Operator(Op::AssignMod)),
('&', '=') => Some(Operator(Op::AssignBitAnd)),
('|', '=') => Some(Operator(Op::AssignBitOr)),
('^', '=') => Some(Operator(Op::AssignBitXor)),
('+', '+') => Some(Operator(Op::Inc)),
('-', '-') => Some(Operator(Op::Dec)),
('*', '*') => Some(Operator(Op::Pow)),
('<', '=') => Some(Operator(Op::Leq)),
('>', '=') => Some(Operator(Op::Geq)),
('=', '=') => Some(Operator(Op::LogicEq)),
('!', '=') => Some(Operator(Op::LogicNeq)),
('-', '>') => Some(Operator(Op::LogicImpl)),
('|', '|') => Some(Operator(Op::LogicOr)),
('&', '&') => Some(Operator(Op::LogicAnd)),
('~', '&') => Some(Operator(Op::BitNand)),
('~', '|') => Some(Operator(Op::BitNor)),
('~', '^') => Some(Operator(Op::BitNxor)),
('^', '~') => Some(Operator(Op::BitXnor)),
('<', '<') => Some(Operator(Op::LogicShL)),
('>', '>') => Some(Operator(Op::LogicShR)),
(':', ':') => Some(Namespace),
('+', ':') => Some(AddColon),
('-', ':') => Some(SubColon),
('#', '#') => Some(DoubleHashtag),
_ => None,
};
if let Some(tkn) = sym {
let sp = Span::union(self.peek[0].1, self.peek[1].1);
self.bump()?;
self.bump()?;
return Ok((tkn, sp));
}
}
if let CatTokenKind::Symbol(c0) = self.peek[0].0 {
let sym = match c0 {
'=' => Some(Operator(Op::Assign)),
'+' => Some(Operator(Op::Add)),
'-' => Some(Operator(Op::Sub)),
'*' => Some(Operator(Op::Mul)),
'/' => Some(Operator(Op::Div)),
'%' => Some(Operator(Op::Mod)),
'<' => Some(Operator(Op::Lt)),
'>' => Some(Operator(Op::Gt)),
'!' => Some(Operator(Op::LogicNot)),
'~' => Some(Operator(Op::BitNot)),
'&' => Some(Operator(Op::BitAnd)),
'|' => Some(Operator(Op::BitOr)),
'^' => Some(Operator(Op::BitXor)),
'(' => Some(OpenDelim(Paren)),
')' => Some(CloseDelim(Paren)),
'[' => Some(OpenDelim(Brack)),
']' => Some(CloseDelim(Brack)),
'{' => Some(OpenDelim(Brace)),
'}' => Some(CloseDelim(Brace)),
'#' => Some(Hashtag),
',' => Some(Comma),
'.' => Some(Period),
':' => Some(Colon),
';' => Some(Semicolon),
'?' => Some(Ternary),
'@' => Some(At),
_ => None,
};
if let Some(tkn) = sym {
let sp = self.peek[0].1;
self.bump()?;
return Ok((tkn, sp));
}
}
match self.peek[0] {
(CatTokenKind::Text, _) | (CatTokenKind::Symbol('_'), _) => {
let (m, msp) = self.match_ident()?;
return match find_keyword(&m) {
Some(Kw::Begin) => Ok((OpenDelim(Bgend), msp)),
Some(Kw::End) => Ok((CloseDelim(Bgend), msp)),
Some(kw) => Ok((Keyword(kw), msp)),
None => Ok((Ident(name_table.intern(&m, true)), msp)),
};
}
(CatTokenKind::Symbol('$'), sp) => {
self.bump()?;
return match self.peek[0].0 {
CatTokenKind::Text
| CatTokenKind::Digits
| CatTokenKind::Symbol('_')
| CatTokenKind::Symbol('$') => {
let (m, msp) = self.match_ident()?;
Ok((SysIdent(name_table.intern(&m, true)), Span::union(sp, msp)))
}
_ => Ok((Dollar, sp)),
};
}
(CatTokenKind::Symbol('\\'), mut sp) => {
let mut s = String::new();
loop {
self.bump()?;
if self.peek[0].0 == CatTokenKind::Whitespace
|| self.peek[0].0 == CatTokenKind::Newline
|| self.peek[0].0 == CatTokenKind::Eof
{
break;
}
sp.expand(self.peek[0].1);
s.push_str(&self.peek[0].1.extract());
}
if s.is_empty() {
return Err(DiagBuilder2::fatal(
"Expected escaped identifier after backslash '\\'",
)
.span(sp));
}
return Ok((EscIdent(name_table.intern(&s, true)), sp));
}
(CatTokenKind::Symbol('\''), sp) => {
self.bump()?;
return self.match_based_number(None, sp);
}
(CatTokenKind::Digits, mut sp) => {
let value = {
let mut s = String::new();
s.push_str(&sp.extract());
self.bump()?;
self.eat_number_body_into(&mut s, &mut sp, false)?;
name_table.intern(&s, true)
};
let frac = if self.peek[0].0 == CatTokenKind::Symbol('.') {
let mut s = String::new();
self.bump()?;
self.eat_number_body_into(&mut s, &mut sp, false)?;
Some(name_table.intern(&s, true))
} else {
None
};
if let Some(unit) = self.try_time_unit() {
sp.expand(self.peek[0].1);
self.bump()?;
return Ok((Literal(Time(value, frac, unit)), sp));
}
if self.peek[0].0 == CatTokenKind::Text {
return Err(DiagBuilder2::fatal(format!(
"number literal `{}` may not directly be followed by letters `{}`",
sp.extract(),
self.peek[0].1.extract(),
))
.span(sp));
}
if frac.is_some() {
return Ok((Literal(Number(value, frac)), sp));
}
self.skip_noise()?;
match (self.peek[0].0, self.peek[1].0) {
(CatTokenKind::Symbol('\''), CatTokenKind::Text)
| (CatTokenKind::Symbol('\''), CatTokenKind::Digits) => {
self.bump()?;
return self.match_based_number(Some(value), sp);
}
_ => return Ok((Literal(Number(value, None)), sp)),
}
}
(CatTokenKind::Symbol('"'), mut span) => {
self.bump()?;
let mut s = String::new();
loop {
match self.peek[0] {
(CatTokenKind::Symbol('"'), sp) => {
span.expand(sp);
self.bump()?;
break;
}
(CatTokenKind::Symbol('\\'), sp) => {
span.expand(sp);
self.bump()?;
match self.peek[0] {
(CatTokenKind::Symbol('\\'), sp) => {
span.expand(sp);
s.push('\\');
}
(CatTokenKind::Newline, sp) => {
span.expand(sp);
}
(CatTokenKind::Symbol('"'), sp) => {
span.expand(sp);
s.push('"');
}
(CatTokenKind::Text, sp) => {
span.expand(sp);
s.push_str(&sp.extract());
}
_ => {
return Err(DiagBuilder2::fatal(
"Unknown escape sequence in string",
)
.span(span))
}
}
}
(CatTokenKind::Newline, sp) => {
return Err(DiagBuilder2::fatal(
"String literals cannot contain unescaped newlines",
)
.span(sp))
}
(_, sp) => {
span.expand(sp);
s.push_str(&sp.extract());
}
}
self.bump()?;
}
return Ok((Literal(Str(name_table.intern(&s, true))), span));
}
(CatTokenKind::Eof, sp) => return Ok((Eof, sp)),
(tkn, sp) => {
return Err(DiagBuilder2::fatal(format!("Unknown token {:?}", tkn)).span(sp))
}
}
}
}
fn skip_noise(&mut self) -> DiagResult2<()> {
loop {
match (self.peek[0].0, self.peek[1].0) {
(CatTokenKind::Symbol('/'), CatTokenKind::Symbol('/')) => {
self.bump()?;
self.bump()?;
loop {
match self.peek[0].0 {
CatTokenKind::Eof => break,
CatTokenKind::Newline => {
self.bump()?;
break;
}
_ => self.bump()?,
}
}
}
(CatTokenKind::Symbol('/'), CatTokenKind::Symbol('*')) => {
self.bump()?;
self.bump()?;
loop {
match (self.peek[0].0, self.peek[1].0) {
(CatTokenKind::Eof, _) => break,
(CatTokenKind::Symbol('*'), CatTokenKind::Symbol('/')) => {
self.bump()?;
self.bump()?;
break;
}
_ => self.bump()?,
}
}
}
(CatTokenKind::Symbol('('), CatTokenKind::Symbol('*'))
if self.peek[2].0 != CatTokenKind::Symbol(')') =>
{
self.bump()?;
self.bump()?;
loop {
match (self.peek[0].0, self.peek[1].0) {
(CatTokenKind::Eof, _) => break,
(CatTokenKind::Symbol('*'), CatTokenKind::Symbol(')')) => {
self.bump()?;
self.bump()?;
break;
}
_ => self.bump()?,
}
}
}
_ => (),
}
match self.peek[0].0 {
CatTokenKind::Whitespace | CatTokenKind::Newline | CatTokenKind::Comment => {
self.bump()?
}
_ => return Ok(()),
}
}
}
fn match_ident(&mut self) -> DiagResult2<(String, Span)> {
let mut s = String::new();
let mut sp = self.peek[0].1;
loop {
match self.peek[0] {
(CatTokenKind::Text, this_sp)
| (CatTokenKind::Digits, this_sp)
| (CatTokenKind::Symbol('_'), this_sp)
| (CatTokenKind::Symbol('$'), this_sp) => {
s.push_str(&this_sp.extract());
sp.expand(this_sp);
self.bump()?;
}
_ => break,
}
}
if s.is_empty() {
return Err(DiagBuilder2::fatal("Could not match an identifier here").span(sp));
}
assert!(!s.is_empty());
Ok((s, sp))
}
fn match_based_number(
&mut self,
size: Option<Name>,
mut span: Span,
) -> DiagResult2<TokenAndSpan> {
match self.peek[0] {
(CatTokenKind::Text, sp) => {
self.bump()?;
let text = sp.extract();
span.expand(sp);
let mut chars = text.chars();
let mut c = chars.next();
let signed = match c {
Some('s') | Some('S') => {
c = chars.next();
true
}
Some('z') | Some('Z') if text.len() == 1 => {
return Ok((Literal(UnbasedUnsized('z')), span))
}
Some('x') | Some('X') if text.len() == 1 => {
return Ok((Literal(UnbasedUnsized('x')), span))
}
_ => false,
};
let base = match c {
Some('d') | Some('D') => 'd',
Some('b') | Some('B') => 'b',
Some('o') | Some('O') => 'o',
Some('h') | Some('H') => 'h',
Some(x) => {
return Err(DiagBuilder2::fatal(format!(
"`{}` is not a valid number base",
x
))
.span(span))
}
None => return Err(DiagBuilder2::fatal("Missing number base").span(span)),
};
c = chars.next();
let mut body = String::new();
if let Some(c) = c {
body.push(c);
body.push_str(chars.as_str());
} else {
self.skip_noise()?;
}
self.eat_number_body_into(&mut body, &mut span, true)?;
return Ok((
Literal(BasedInteger(
size,
signed,
base,
get_name_table().intern(&body, true),
)),
span,
));
}
(CatTokenKind::Digits, sp) if size.is_none() => {
self.bump()?;
let value = sp.extract();
span.expand(sp);
match value.chars().next() {
Some('0') if value.len() == 1 => {
return Ok((Literal(UnbasedUnsized('0')), span))
}
Some('1') if value.len() == 1 => {
return Ok((Literal(UnbasedUnsized('1')), span))
}
_ => {
return Err(DiagBuilder2::fatal(
"Unbased unsized literal may only be '0, '1, 'x, or 'z",
)
.span(span))
}
}
}
(CatTokenKind::Symbol('?'), sp) => {
self.bump()?;
span.expand(sp);
return Ok((Literal(UnbasedUnsized('z')), span));
}
_ => return Ok((Apostrophe, span)),
}
}
fn eat_number_body_into(
&mut self,
into: &mut String,
span: &mut Span,
allow_alphabetic: bool,
) -> DiagResult2<()> {
loop {
match self.peek[0] {
(CatTokenKind::Digits, sp) | (CatTokenKind::Text, sp) => {
if self.peek[0].0 == CatTokenKind::Text && !allow_alphabetic {
break;
}
into.push_str(&sp.extract());
span.expand(sp);
}
(CatTokenKind::Symbol('_'), _) => (),
(CatTokenKind::Symbol('?'), sp) => {
into.push('?');
span.expand(sp);
}
_ => break,
}
self.bump()?;
}
Ok(())
}
fn try_time_unit(&mut self) -> Option<TimeUnit> {
if self.peek[0].0 == CatTokenKind::Text {
match self.peek[0].1.extract().as_str() {
"s" => Some(TimeUnit::Second),
"ms" => Some(TimeUnit::MilliSecond),
"us" => Some(TimeUnit::MicroSecond),
"ns" => Some(TimeUnit::NanoSecond),
"ps" => Some(TimeUnit::PicoSecond),
"fs" => Some(TimeUnit::FemtoSecond),
_ => None,
}
} else {
None
}
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = DiagResult2<TokenAndSpan>;
fn next(&mut self) -> Option<Self::Item> {
match self.next_token() {
Ok((Eof, _)) => None,
x => Some(x),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn check(input: &str, expected: &[Token]) {
use std::cell::Cell;
thread_local!(static INDEX: Cell<usize> = Cell::new(0));
let sm = get_source_manager();
let idx = INDEX.with(|i| {
let v = i.get();
i.set(v + 1);
v
});
let source = sm.add(&format!("test_{}.sv", idx), input);
let pp = Preprocessor::new(source, &[], &[]);
let lexer = Lexer::new(pp);
let actual: Vec<_> = lexer.map(|x| x.unwrap().0).collect();
assert_eq!(actual, expected);
}
fn check_single(input: &str, expected: Token) {
check(input, &[expected]);
}
fn name(n: &str) -> Name {
get_name_table().intern(n, true)
}
#[test]
fn idents() {
check(
"shiftreg_a busa_index error_condition merge_ab _bus3 n$657",
&vec![
Ident(name("shiftreg_a")),
Ident(name("busa_index")),
Ident(name("error_condition")),
Ident(name("merge_ab")),
Ident(name("_bus3")),
Ident(name("n$657")),
],
);
}
#[test]
fn esc_idents() {
check(
"\\busa+index \\-clock \\***error-condition*** \\net1/\\net2 \\{a,b} \\a*(b+c)",
&vec![
EscIdent(name("busa+index")),
EscIdent(name("-clock")),
EscIdent(name("***error-condition***")),
EscIdent(name("net1/\\net2")),
EscIdent(name("{a,b}")),
EscIdent(name("a*(b+c)")),
],
);
}
#[test]
fn sys_idents() {
check(
"$display $finish $01_ad$as3_",
&vec![
SysIdent(name("display")),
SysIdent(name("finish")),
SysIdent(name("01_ad$as3_")),
],
);
}
#[test]
fn unbased_unsized_literal() {
check_single("'0", Literal(UnbasedUnsized('0')));
check_single("'1", Literal(UnbasedUnsized('1')));
check_single("'X", Literal(UnbasedUnsized('x')));
check_single("'x", Literal(UnbasedUnsized('x')));
check_single("'Z", Literal(UnbasedUnsized('z')));
check_single("'z", Literal(UnbasedUnsized('z')));
check_single("'?", Literal(UnbasedUnsized('z')));
}
#[test]
fn unsized_literal_constant_numbers() {
check(
"659; 'h 837FF; 'o7460",
&[
Literal(Number(name("659"), None)),
Semicolon,
Literal(BasedInteger(None, false, 'h', name("837FF"))),
Semicolon,
Literal(BasedInteger(None, false, 'o', name("7460"))),
],
);
}
#[test]
#[should_panic(expected = "number literal `4` may not directly be followed by letters `af`")]
fn unsized_literal_constant_numbers_illegal() {
check("4af", &vec![]);
}
#[test]
fn sized_literal_constant_numbers() {
check(
"4'b1001; 5 'D 3; 3'b01x; 12'hx; 16'hz",
&[
Literal(BasedInteger(Some(name("4")), false, 'b', name("1001"))),
Semicolon,
Literal(BasedInteger(Some(name("5")), false, 'd', name("3"))),
Semicolon,
Literal(BasedInteger(Some(name("3")), false, 'b', name("01x"))),
Semicolon,
Literal(BasedInteger(Some(name("12")), false, 'h', name("x"))),
Semicolon,
Literal(BasedInteger(Some(name("16")), false, 'h', name("z"))),
],
);
}
#[test]
fn signed_literal_constant_numbers() {
check(
"4 'shf; 16'sd?",
&[
Literal(BasedInteger(Some(name("4")), true, 'h', name("f"))),
Semicolon,
Literal(BasedInteger(Some(name("16")), true, 'd', name("?"))),
],
);
}
#[test]
#[ignore]
fn underscores_in_literal_constant_numbers() {
check(
"27_195_000; 16'b0011_0101_0001_1111; 32 'h 12ab_f001",
&[
Literal(Number(name("27195000"), None)),
Semicolon,
Literal(BasedInteger(
Some(name("16")),
false,
'b',
name("0011010100011111"),
)),
Semicolon,
Literal(BasedInteger(Some(name("32")), false, 'h', name("12abf001"))),
],
);
}
#[test]
fn multiline_string_literal() {
check(
"$display(\"Humpty Dumpty sat on a wall. \\\nHumpty Dumpty had a great fall.\")",
&[
SysIdent(name("display")),
OpenDelim(Paren),
Literal(Str(name(
"Humpty Dumpty sat on a wall. Humpty Dumpty had a great fall.",
))),
CloseDelim(Paren),
],
);
}
#[test]
fn time_literal() {
check(
"42s 14.3ms 16.32us 9ns 0.1ps 8123fs",
&[
Literal(Time(name("42"), None, TimeUnit::Second)),
Literal(Time(name("14"), Some(name("3")), TimeUnit::MilliSecond)),
Literal(Time(name("16"), Some(name("32")), TimeUnit::MicroSecond)),
Literal(Time(name("9"), None, TimeUnit::NanoSecond)),
Literal(Time(name("0"), Some(name("1")), TimeUnit::PicoSecond)),
Literal(Time(name("8123"), None, TimeUnit::FemtoSecond)),
],
);
}
#[test]
fn number_literal() {
check(
"42 4.2",
&[
Literal(Number(name("42"), None)),
Literal(Number(name("4"), Some(name("2")))),
],
);
}
}