use Symbol::*;
use TokenKind::*;
use super::Base;
pub(crate) struct Token {
pub kind: TokenKind,
pub len: usize,
}
#[derive(Debug, PartialEq, Eq)]
pub enum TokenKind {
OpenParen,
CloseParen,
Semicolon,
Sym(Symbol),
Num(Num),
Ty(Ty),
Ident,
Whitespace,
Invalid,
Eof,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Symbol {
Plus,
Minus,
Star,
Star2,
Slash,
Percentage,
Ampersand,
Pipe,
Caret,
Bang,
Eq,
Lt2,
Gt2,
}
#[cfg(not(any(test, small_max)))]
pub type W = u16;
#[cfg(any(test, small_max))]
pub type W = u8;
#[derive(Debug, PartialEq, Eq)]
pub struct Num {
pub base: Base,
pub prefixed: bool,
pub infinite_digit: bool,
pub point: Option<W>,
pub repeat: Option<W>,
pub exp: Option<W>,
pub truncated: bool,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Ty {
pub has_apostrophe: bool,
pub prefix: Option<TyPrefix>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TyPrefix {
Integer,
UnsignedInteger,
FixedPoint,
UnsignedFixedPoint,
}
impl TyPrefix {
pub(crate) fn len(self) -> usize {
match self {
Self::Integer | Self::UnsignedInteger | Self::FixedPoint => 1,
Self::UnsignedFixedPoint => 2,
}
}
#[must_use]
pub fn signed(self) -> bool {
match self {
Self::Integer | Self::FixedPoint => true,
Self::UnsignedInteger | Self::UnsignedFixedPoint => false,
}
}
#[must_use]
pub fn fixed_point(self) -> bool {
match self {
Self::FixedPoint | Self::UnsignedFixedPoint => true,
Self::Integer | Self::UnsignedInteger => false,
}
}
}
pub(crate) struct Lexer<'a> {
chars: std::str::Chars<'a>,
src: &'a str,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Lexer<'a> {
Lexer {
chars: input.chars(),
src: input,
}
}
pub fn next_token(&mut self) -> Token {
Token {
kind: self.eat().map_or(Eof, |c| self.token(c)),
len: self.len(),
}
}
fn peek(&self) -> char {
self.chars.clone().next().unwrap_or('\0')
}
fn eat(&mut self) -> Option<char> {
self.chars.next()
}
fn len(&self) -> usize {
self.src.len() - self.chars.as_str().len()
}
fn src(&self) -> &str {
&self.src[..self.len()]
}
fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
while predicate(self.peek()) && !self.chars.as_str().is_empty() {
self.eat();
}
}
fn token(&mut self, first: char) -> TokenKind {
match first {
_ if first.is_whitespace() => {
self.eat_while(char::is_whitespace);
Whitespace
}
'(' => OpenParen,
')' => CloseParen,
';' => Semicolon,
'+' => Sym(Plus),
'-' => Sym(Minus),
'*' => {
if self.peek() == '*' {
self.eat();
Sym(Star2)
} else {
Sym(Star)
}
}
'/' => Sym(Slash),
'%' => Sym(Percentage),
'&' => Sym(Ampersand),
'|' => Sym(Pipe),
'^' => Sym(Caret),
'!' => Sym(Bang),
'=' => Sym(Eq),
'<' => {
if self.peek() == '<' {
self.eat(); Sym(Lt2)
} else {
Invalid
}
}
'>' => {
if self.peek() == '>' {
self.eat(); Sym(Gt2)
} else {
Invalid
}
}
'.' => {
if self.peek().is_ascii_digit() || self.peek() == '(' {
self.eat_num(first)
} else {
Invalid
}
}
'0'..='9' => self.eat_num(first),
c if c.is_alphabetic() || c == '_' => self.eat_ident(),
'\'' => self.eat_ty(true),
_ => Invalid,
}
}
fn eat_num(&mut self, first: char) -> TokenKind {
let mut base = Base::Dec;
let mut prefixed = false;
let mut infinite_digit = false;
if first == '0' {
if let Some(b) = Base::from_prefix(self.peek()) {
self.eat(); base = b;
prefixed = true;
if self.peek() == '(' && base.radix().count_ones() == 1 {
self.eat(); if self.peek() == base.max_digit() {
self.eat(); if self.peek() == ')' {
self.eat(); infinite_digit = true;
} else {
return Invalid;
}
} else {
return Invalid;
}
}
}
};
let mut has_digits = !prefixed && matches!(first, '0'..='9');
self.eat_digits(base, &mut has_digits);
let point = if first == '.' {
Some(0)
} else if self.peek() == '.' {
let pos = if let Ok(pos) = self.len().try_into() {
Some(pos)
} else {
return Invalid;
};
self.eat(); self.eat_digits(base, &mut has_digits);
pos
} else {
None
};
let repeat = if point.is_some() && self.peek() == '(' {
let pos = if let Ok(pos) = self.len().try_into() {
Some(pos)
} else {
return Invalid;
};
self.eat(); self.eat_digits(base, &mut has_digits);
if self.peek() == ')' {
self.eat(); } else {
return Invalid;
};
pos
} else {
None
};
let exp = if matches!(base, Base::Dec) && matches!(self.peek(), 'e' | 'E') {
let pos = if let Ok(pos) = self.len().try_into() {
Some(pos)
} else {
return Invalid;
};
self.eat(); if self.peek() == '-' {
self.eat(); }
let mut has_digits = false;
self.eat_digits(Base::Dec, &mut has_digits);
if has_digits {
pos
} else {
return Invalid;
}
} else {
None
};
let truncated = if point.is_some() && exp.is_none() && self.peek() == '.' {
self.eat(); if self.peek() == '.' {
self.eat();
true
} else {
return Invalid;
}
} else {
false
};
if has_digits || infinite_digit {
Num(Num {
base,
prefixed,
infinite_digit,
point,
repeat,
exp,
truncated,
})
} else {
Invalid
}
}
fn eat_digits(&mut self, base: Base, has_digits: &mut bool) {
self.eat_while(|c| {
if base.is_digit(c) {
*has_digits = true;
true
} else {
c == '_'
}
});
}
fn eat_ident(&mut self) -> TokenKind {
self.eat_while(|c| c.is_alphabetic() || c == '_');
if matches!(self.src(), "i" | "u" | "q" | "uq") {
self.eat_ty(false)
} else {
self.eat_while(|c| c.is_alphabetic() || c.is_ascii_digit() || c == '_');
Ident
}
}
fn eat_ty(&mut self, has_apostrophe: bool) -> TokenKind {
let prefix = if has_apostrophe {
self.eat_while(char::is_alphabetic);
&self.src()[1..]
} else {
self.src()
};
let prefix = match prefix {
"i" => Some(TyPrefix::Integer),
"u" => Some(TyPrefix::UnsignedInteger),
"q" => Some(TyPrefix::FixedPoint),
"uq" => Some(TyPrefix::UnsignedFixedPoint),
_ => None,
};
let mut has_digits = false;
self.eat_digits(Base::Dec, &mut has_digits);
if prefix.map_or(true, TyPrefix::fixed_point) && self.peek() == '.' {
self.eat(); self.eat_digits(Base::Dec, &mut has_digits);
}
if prefix.is_some() || has_digits {
Ty(Ty {
has_apostrophe,
prefix,
})
} else {
Invalid
}
}
}
pub fn tokens(src: &str) -> impl Iterator<Item = (TokenKind, super::Span)> + '_ {
let mut pos = 0;
let mut empty = false;
std::iter::from_fn(move || {
if empty {
None
} else {
let token = Lexer::new(&src[pos..]).next_token();
let span = super::Span::new(pos, pos + token.len);
pos += token.len;
empty = matches!(token.kind, Eof);
Some((token.kind, span))
}
})
}
#[cfg(test)]
mod test {
use crate::Base::*;
use super::Num;
use super::Symbol::*;
use super::TokenKind::*;
use super::Ty;
impl Num {
fn new(base: super::Base, prefixed: bool) -> Self {
Self {
base,
prefixed,
point: None,
infinite_digit: false,
repeat: None,
exp: None,
truncated: false,
}
}
fn num() -> Self {
Self::new(Dec, false)
}
fn bin() -> Self {
Self::new(Bin, true)
}
fn oct() -> Self {
Self::new(Oct, true)
}
fn dec() -> Self {
Self::new(Dec, true)
}
fn hex() -> Self {
Self::new(Hex, true)
}
fn inf(self) -> Self {
Self {
infinite_digit: true,
..self
}
}
fn pt(self, point: super::W) -> Self {
let point = Some(point);
Self { point, ..self }
}
fn rp(self, repeat: super::W) -> Self {
let repeat = Some(repeat);
Self { repeat, ..self }
}
fn exp(self, exp: super::W) -> Self {
let exp = Some(exp);
Self { exp, ..self }
}
fn tr(self) -> Self {
Self {
truncated: true,
..self
}
}
}
impl Ty {
fn new(has_apostrophe: bool, prefix: Option<super::TyPrefix>) -> Self {
Self {
has_apostrophe,
prefix,
}
}
fn i() -> Self {
Self::new(false, Some(super::TyPrefix::Integer))
}
fn u() -> Self {
Self::new(false, Some(super::TyPrefix::UnsignedInteger))
}
fn q() -> Self {
Self::new(false, Some(super::TyPrefix::FixedPoint))
}
fn uq() -> Self {
Self::new(false, Some(super::TyPrefix::UnsignedFixedPoint))
}
fn ci() -> Self {
Self::new(true, Some(super::TyPrefix::Integer))
}
fn cu() -> Self {
Self::new(true, Some(super::TyPrefix::UnsignedInteger))
}
fn cq() -> Self {
Self::new(true, Some(super::TyPrefix::FixedPoint))
}
fn cuq() -> Self {
Self::new(true, Some(super::TyPrefix::UnsignedFixedPoint))
}
fn cw() -> Self {
Self::new(true, None)
}
}
pub(crate) fn tokenize(src: &str) -> impl Iterator<Item = super::TokenKind> + '_ {
let mut pos = 0;
let mut empty = false;
std::iter::from_fn(move || {
(!empty).then(|| {
let token = super::Lexer::new(&src[pos..]).next_token();
pos += token.len;
empty = matches!(token.kind, Eof);
token.kind
})
})
}
#[test]
fn tokens() {
use crate::Span;
assert_eq!(
super::tokens("(8 / 4) * 2").collect::<Vec<_>>(),
&[
(OpenParen, Span::new(0, 1)),
(Num(Num::num()), Span::new(1, 2)),
(Whitespace, Span::new(2, 3)),
(Sym(Slash), Span::new(3, 4)),
(Whitespace, Span::new(4, 5)),
(Num(Num::num()), Span::new(5, 6)),
(CloseParen, Span::new(6, 7)),
(Whitespace, Span::new(7, 8)),
(Sym(Star), Span::new(8, 9)),
(Whitespace, Span::new(9, 10)),
(Num(Num::num()), Span::new(10, 11)),
(Eof, Span::new(11, 11))
]
);
}
macro_rules! test_lex {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
let actual = tokenize($src).collect::<Vec<_>>();
let expected = vec![$($($token),*,)? Eof];
assert_eq!(actual, expected, "{}", $src);
};
}
#[test]
fn basic() {
test_lex!(
"1 + 2",
Num(Num::num()),
Whitespace,
Sym(Plus),
Whitespace,
Num(Num::num()),
);
test_lex!(
"(8 / 4) * 2",
OpenParen,
Num(Num::num()),
Whitespace,
Sym(Slash),
Whitespace,
Num(Num::num()),
CloseParen,
Whitespace,
Sym(Star),
Whitespace,
Num(Num::num()),
);
}
#[test]
fn output() {
test_lex!(
"251 (= -5) = 0b11111011 = 0o373 = 0xfb",
Num(Num::num()),
Whitespace,
OpenParen,
Sym(Eq),
Whitespace,
Sym(Minus),
Num(Num::num()),
CloseParen,
Whitespace,
Sym(Eq),
Whitespace,
Num(Num::bin()),
Whitespace,
Sym(Eq),
Whitespace,
Num(Num::oct()),
Whitespace,
Sym(Eq),
Whitespace,
Num(Num::hex()),
);
test_lex!(
"= 0b1010.1101001010..",
Sym(Eq),
Whitespace,
Num(Num::bin().pt(6).tr()),
);
}
#[test]
fn whitespace() {
test_lex!(" \n\r \t\n", Whitespace);
}
#[test]
fn empty() {
test_lex!("");
}
#[test]
fn int() {
test_lex!("22", Num(Num::num()));
test_lex!("0d23", Num(Num::dec()));
test_lex!("0b1101", Num(Num::bin()));
test_lex!("0o7", Num(Num::oct()));
test_lex!("0xcc", Num(Num::hex()));
test_lex!("0xCc", Num(Num::hex()));
test_lex!(
"0x1_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000",
Num(Num::hex()),
);
}
#[test]
fn int_underscore() {
test_lex!("100_000", Num(Num::num()));
test_lex!("_100_000", Ident);
test_lex!("1______", Num(Num::num()));
}
#[test]
fn int_invalid() {
test_lex!("0o", Invalid);
test_lex!("0oC", Invalid, Ident);
test_lex!("0x_", Invalid);
}
#[test]
fn int_ty() {
test_lex!("100u8", Num(Num::num()), Ty(Ty::u()));
test_lex!("0b101i3", Num(Num::bin()), Ty(Ty::i()));
test_lex!("0x7fff_ffffu31", Num(Num::hex()), Ty(Ty::u()));
test_lex!("1u", Num(Num::num()), Ty(Ty::u()));
test_lex!("0b101i", Num(Num::bin()), Ty(Ty::i()));
test_lex!("1u.1", Num(Num::num()), Ty(Ty::u()), Num(Num::num().pt(0)));
test_lex!("1i1.1", Num(Num::num()), Ty(Ty::i()), Num(Num::num().pt(0)));
}
#[test]
fn pow() {
test_lex!("5**2", Num(Num::num()), Sym(Star2), Num(Num::num()));
}
#[test]
fn inf() {
test_lex!("0b(1)01", Num(Num::bin().inf()));
test_lex!("0o(7)01234567", Num(Num::oct().inf()));
test_lex!("0x(f)0123456789abcde", Num(Num::hex().inf()));
}
#[test]
fn inf_invalid() {
test_lex!("0b(1", Invalid);
test_lex!("0d(9)", Invalid, OpenParen, Num(Num::num()), CloseParen);
test_lex!("0x(e)", Invalid, Ident, CloseParen);
test_lex!("0b(2)", Invalid, Num(Num::num()), CloseParen);
}
#[test]
fn cast() {
test_lex!("1'1", Num(Num::num()), Ty(Ty::cw()));
test_lex!("1'i", Num(Num::num()), Ty(Ty::ci()),);
test_lex!("1'u", Num(Num::num()), Ty(Ty::cu()));
test_lex!("1'u4", Num(Num::num()), Ty(Ty::cu()));
test_lex!("1'i600", Num(Num::num()), Ty(Ty::ci()),);
}
#[test]
fn cast_invalid() {
test_lex!("1'", Num(Num::num()), Invalid);
test_lex!("1'.", Num(Num::num()), Invalid);
}
#[test]
fn cast_f() {
test_lex!("0x20'q10.50", Num(Num::hex()), Ty(Ty::cq()));
test_lex!("10'uq.50", Num(Num::num()), Ty(Ty::cuq()));
test_lex!("10'.50", Num(Num::num()), Ty(Ty::cw()));
}
#[test]
fn fix() {
test_lex!(".50", Num(Num::num().pt(0)));
test_lex!("1.5", Num(Num::num().pt(1)));
test_lex!("1.", Num(Num::num().pt(1)));
test_lex!("0xa.", Num(Num::hex().pt(3)));
test_lex!("0b.1", Num(Num::bin().pt(2)));
test_lex!("0o377.773", Num(Num::oct().pt(5)));
}
#[test]
fn fix_underscore() {
test_lex!("1.0_1", Num(Num::num().pt(1)));
test_lex!("1._1", Num(Num::num().pt(1)));
test_lex!("1._", Num(Num::num().pt(1)));
}
#[test]
fn fix_repeat() {
test_lex!(".0(1)", Num(Num::num().pt(0).rp(2)));
test_lex!(".(1)", Num(Num::num().pt(0).rp(1)));
test_lex!("0x0.(71c)", Num(Num::hex().pt(3).rp(4)));
test_lex!("1.0(_)", Num(Num::num().pt(1).rp(3)));
test_lex!("1.0()", Num(Num::num().pt(1).rp(3)));
}
#[test]
fn fix_repeat_invalid() {
test_lex!("1.0(1", Invalid);
test_lex!("0x1.0(0x1)", Invalid, Ident, CloseParen);
}
#[test]
fn fix_ty() {
test_lex!(".50u89", Num(Num::num().pt(0)), Ty(Ty::u()));
test_lex!("0d.1q0.5", Num(Num::dec().pt(2)), Ty(Ty::q()));
test_lex!("1uq", Num(Num::num()), Ty(Ty::uq()));
test_lex!("1.5q.5", Num(Num::num().pt(1)), Ty(Ty::q()));
}
#[test]
fn fix_ty_invalid() {
test_lex!(".50u0x8", Num(Num::num().pt(0)), Ty(Ty::u()), Ident);
}
#[test]
fn fix_point_long() {
test_lex!(
&format!("{}.", "0".repeat(usize::from(super::W::MAX))),
Num(Num::num().pt(super::W::MAX))
);
test_lex!(
&format!("{}.", "0".repeat(usize::from(super::W::MAX) + 1)),
Invalid,
Invalid,
);
}
#[test]
fn fix_repeat_long() {
test_lex!(
&format!(".{}(101)", "0".repeat(usize::from(super::W::MAX) - 1)),
Num(Num::num().pt(0).rp(super::W::MAX))
);
test_lex!(
&format!(".{}(101)", "0".repeat(usize::from(super::W::MAX))),
Invalid,
OpenParen,
Num(Num::num()),
CloseParen,
);
}
#[test]
fn exp_long() {
test_lex!(
&format!("{}e12", "0".repeat(usize::from(super::W::MAX))),
Num(Num::num().exp(super::W::MAX))
);
test_lex!(
&format!("{}e12", "0".repeat(usize::from(super::W::MAX) + 1)),
Invalid,
Ident,
);
}
#[test]
fn fix_invalid() {
test_lex!("0b.", Invalid);
test_lex!("0o.", Invalid);
test_lex!("0x.", Invalid);
test_lex!(".", Invalid);
}
#[test]
fn exp() {
test_lex!("1e1", Num(Num::num().exp(1)));
test_lex!("1E1", Num(Num::num().exp(1)));
test_lex!("0d1e1", Num(Num::dec().exp(3)));
test_lex!("1e0", Num(Num::num().exp(1)));
test_lex!("0_1_e2", Num(Num::num().exp(4)));
test_lex!("0.01e3", Num(Num::num().pt(1).exp(4)));
test_lex!("5e30", Num(Num::num().exp(1)));
test_lex!("8e-100", Num(Num::num().exp(1)));
test_lex!("1.5(123)e3", Num(Num::num().pt(1).rp(3).exp(8)));
test_lex!("1e_1", Num(Num::num().exp(1)));
test_lex!("1e1_", Num(Num::num().exp(1)));
test_lex!("1e-_1", Num(Num::num().exp(1)));
}
#[test]
fn exp_invalid() {
test_lex!("0o1e3", Num(Num::oct()), Ident);
test_lex!("1e", Invalid);
test_lex!("1e-", Invalid);
test_lex!("1e_-1", Invalid, Sym(Minus), Num(Num::num()));
}
#[test]
fn truncate() {
test_lex!("5.123..", Num(Num::num().pt(1).tr()));
test_lex!("0...", Num(Num::num().pt(1).tr()));
}
#[test]
fn truncate_invalid() {
test_lex!("0..", Invalid);
test_lex!("0.01.", Invalid);
}
#[test]
fn shift_invalid() {
test_lex!("<>", Invalid, Invalid);
test_lex!("><", Invalid, Invalid);
}
#[test]
fn ident() {
test_lex!("a", Ident);
test_lex!("abc", Ident);
test_lex!("a0", Ident);
test_lex!("abc0123456789", Ident);
test_lex!("абв", Ident);
test_lex!("ii128", Ident);
test_lex!("i_128", Ident);
test_lex!("iu", Ident);
test_lex!("a_b_c", Ident);
test_lex!("a_1_c", Ident);
test_lex!("_a", Ident);
test_lex!("_1", Ident);
test_lex!("_", Ident);
}
}