use std::convert::TryInto;
use std::iter::{Enumerate, Peekable};
use std::str::Chars;
use crate::num::parse_f64_hex;
pub struct Spanned<T> {
inner: T,
pos: Pos,
}
#[derive(Clone, Copy, Debug, Default)]
pub struct Pos(usize, usize);
impl<T> Spanned<T> {
pub fn tuple(&self) -> (Pos, &T) {
(self.pos, &self.inner)
}
pub fn into_tuple(self) -> (Pos, T) {
(self.pos, self.inner)
}
pub fn inner(&self) -> &T {
&self.inner
}
pub fn pos(&self) -> Pos {
self.pos
}
}
impl Pos {
pub fn line(&self) -> usize {
self.0
}
pub fn col(&self) -> usize {
self.1
}
}
#[derive(Debug, Clone)]
pub enum Token {
Ident(String),
String(String),
Integer(i64),
Float(f64),
And,
Break,
Do,
Else,
Elseif,
End,
False,
For,
Function,
Goto,
If,
In,
Local,
Nil,
Not,
Or,
Repeat,
Return,
Then,
True,
Until,
While,
Add,
Sub,
Mul,
Div,
Mod,
Pow,
Hash,
Amp,
Tilde,
Bar,
Shl,
Shr,
FlDiv,
Eq,
Neq,
Leq,
Geq,
Lt,
Gt,
Is,
ParO,
ParC,
CurlO,
CurlC,
SqrO,
SqrC,
Ass,
Semi,
Colon,
Comma,
Point,
Conc,
Dots,
}
pub struct Tokenizer<'a> {
input: Peekable<Enumerate<Chars<'a>>>,
line: usize,
line_offset: usize,
}
impl<'a> Tokenizer<'a> {
pub fn read(input: &'a str) -> Self {
Self {
input: input.chars().enumerate().peekable(),
line: 0,
line_offset: 0,
}
}
fn line_inc(&mut self, pos: usize) {
self.line += 1;
self.line_offset = pos + 1; }
fn read_token(&mut self) -> Option<Spanned<Token>> {
let (pos, char) = loop {
self.consume_whitespace();
let (pos, char) = self.input.next()?;
if char == '-' && matches!(self.input.peek(), Some((_, '-'))) {
self.input.next();
self.consume_comment();
} else {
break (pos, char);
}
};
let pos = Pos(self.line, pos.saturating_sub(self.line_offset));
let token = match char {
'+' => Token::Add,
'-' => Token::Sub,
'*' => Token::Mul,
'/' => {
if let Some((_, '/')) = self.input.peek() {
self.input.next();
Token::FlDiv
} else {
Token::Div
}
}
'%' => Token::Mod,
'^' => Token::Pow,
'#' => Token::Hash,
'&' => Token::Amp,
'~' => {
if let Some((_, '=')) = self.input.peek() {
self.input.next();
Token::Neq
} else {
Token::Tilde
}
}
'|' => Token::Bar,
'<' => {
if let Some((_, '<')) = self.input.peek() {
self.input.next();
Token::Shl
} else if let Some((_, '=')) = self.input.peek() {
self.input.next();
Token::Leq
} else {
Token::Lt
}
}
'>' => {
if let Some((_, '>')) = self.input.peek() {
self.input.next();
Token::Shr
} else if let Some((_, '=')) = self.input.peek() {
self.input.next();
Token::Geq
} else {
Token::Gt
}
}
'=' => {
if let Some((_, '=')) = self.input.peek() {
self.input.next();
Token::Eq
} else {
Token::Is
}
}
'(' => Token::ParO,
')' => Token::ParC,
'{' => Token::CurlO,
'}' => Token::CurlC,
'[' => match self.input.peek() {
Some((_, '[')) | Some((_, '=')) => self.consume_long_string(),
_ => Token::SqrO,
},
']' => Token::SqrC,
':' => {
if let Some((_, ':')) = self.input.peek() {
self.input.next();
Token::Ass
} else {
Token::Colon
}
}
';' => Token::Semi,
',' => Token::Comma,
'.' => match self.input.peek().cloned() {
Some((_, '.')) => {
self.input.next();
if let Some((_, '.')) = self.input.peek() {
self.input.next();
Token::Dots
} else {
Token::Conc
}
}
Some((_, c)) if is_numeric_char(c) => self.consume_numeric('.'),
_ => Token::Point,
},
c if is_start_name_char(c) => self.consume_name(c),
c if matches!(c, '"' | '\'') => self.consume_string(c),
c if is_numeric_char(c) => self.consume_numeric(c),
c => panic!("{}", c),
};
Some(Spanned { inner: token, pos })
}
fn consume_whitespace(&mut self) {
while let Some((_, peek)) = self.input.peek() {
if is_whitespace_char(*peek) {
if let Some((pos, '\n')) = self.input.next() {
self.line_inc(pos);
}
} else {
break;
}
}
}
fn consume_name(&mut self, start: char) -> Token {
assert!(is_start_name_char(start));
let mut name = vec![start];
while let Some((_, peek)) = self.input.peek().cloned() {
if is_name_char(peek) {
self.input.next();
name.push(peek);
} else {
break;
}
}
let name: String = name.iter().collect();
match name.as_str() {
"and" => Token::And,
"break" => Token::Break,
"do" => Token::Do,
"else" => Token::Else,
"elseif" => Token::Elseif,
"end" => Token::End,
"false" => Token::False,
"for" => Token::For,
"function" => Token::Function,
"goto" => Token::Goto,
"if" => Token::If,
"in" => Token::In,
"local" => Token::Local,
"nil" => Token::Nil,
"not" => Token::Not,
"or" => Token::Or,
"repeat" => Token::Repeat,
"return" => Token::Return,
"then" => Token::Then,
"true" => Token::True,
"until" => Token::Until,
"while" => Token::While,
_ => Token::Ident(name),
}
}
fn consume_string(&mut self, delim: char) -> Token {
let mut string = vec![];
while let Some((_, next)) = self.input.next() {
match next {
c if c == delim => break,
'\n' => panic!(),
'\\' => match self.input.next() {
Some((pos, next)) => match next {
'a' => string.push('\x07'),
'b' => string.push('\x08'),
't' => string.push('\t'),
'n' => string.push('\n'),
'v' => string.push('\x0B'),
'f' => string.push('\x0C'),
'r' => string.push('\x0D'),
'\\' | '"' | '\'' => string.push(next),
'\n' => {
self.line_inc(pos);
string.push('\n')
}
'z' => self.consume_whitespace(),
'x' => {
let d1 = match self.input.next() {
Some((_, d)) => d,
None => panic!(),
};
let d2 = match self.input.next() {
Some((_, d)) => d,
None => panic!(),
};
if !is_hex_digit_char(d1) || !is_hex_digit_char(d2) {
panic!()
}
let digits: String = vec![d1, d2].iter().collect();
let value = u8::from_str_radix(&digits, 16).unwrap();
string.push(value.into());
}
c if is_numeric_char(c) => {
let mut digits = vec![c];
if let Some((_, peek)) = self.input.peek().cloned() {
if is_numeric_char(peek) {
self.input.next();
digits.push(peek);
}
}
if let Some((_, peek)) = self.input.peek().cloned() {
if is_numeric_char(peek) {
self.input.next();
digits.push(peek);
}
}
let digits: String = digits.iter().collect();
let value = digits.parse::<u8>().unwrap();
string.push(value.into());
}
'u' => {
match self.input.next() {
Some((_, '{')) => {}
_ => panic!(),
}
let mut digits = vec![];
loop {
match self.input.next() {
Some((_, c)) if is_hex_digit_char(c) => digits.push(c),
Some((_, '}')) => break,
_ => panic!(),
}
}
if digits.is_empty() {
panic!()
}
let digits: String = digits.iter().collect();
let value = u32::from_str_radix(&digits, 16).unwrap();
string.push(value.try_into().unwrap());
}
_ => panic!(),
},
None => panic!(),
},
_ => string.push(next),
}
}
Token::String(string.iter().collect())
}
fn consume_long_string(&mut self) -> Token {
let mut len = 0;
loop {
match self.input.next() {
Some((_, '[')) => break,
Some((_, '=')) => len += 1,
_ => panic!(),
}
}
let mut string = vec![];
loop {
match self.input.next() {
Some((pos, char)) => match char {
']' => {
let mut buffer = vec![char];
loop {
match self.input.next() {
Some((_, next)) => {
buffer.push(next);
if next == ']' || next != '=' {
break;
}
}
None => panic!(),
}
}
if len + 2 == buffer.len()
&& buffer.first() == Some(&']')
&& buffer.last() == Some(&']')
{
break;
} else {
string.extend(buffer);
}
}
'\n' => {
self.line_inc(pos);
if let Some((_, '\r')) = self.input.peek() {
self.input.next();
}
if !string.is_empty() {
string.push('\n');
}
}
'\r' => {
if let Some((_, '\n')) = self.input.peek() {
let pos = self.input.next().unwrap().0;
self.line_inc(pos);
}
if !string.is_empty() {
string.push('\n');
}
}
c => string.push(c),
},
None => panic!(),
}
}
Token::String(string.iter().collect())
}
fn consume_numeric(&mut self, digit: char) -> Token {
let mut hex = false;
let mut int = digit != '.';
let mut digits = vec![digit];
if let Some((_, char)) = self.input.peek() {
if matches!(char, 'X' | 'x') {
self.input.next();
if digits.first() == Some(&'0') {
hex = true;
digits.clear();
} else {
panic!();
}
}
}
while let Some((_, c)) = self.input.peek().cloned() {
match c {
c if hex && is_hex_digit_char(c) => {
self.input.next();
digits.push(c);
}
c if !hex && is_numeric_char(c) => {
self.input.next();
digits.push(c);
}
'.' => {
if int {
self.input.next();
digits.push(c);
int = false;
} else {
panic!()
}
}
'E' | 'e' => {
if hex {
panic!()
} else {
self.input.next();
int = false;
digits.push(c);
match self.input.peek().cloned() {
Some((_, '-')) => {
self.input.next();
digits.push('-');
}
Some((_, '+')) => {
self.input.next();
}
_ => {}
}
loop {
match self.input.peek().cloned() {
Some((_, c)) if is_numeric_char(c) => {
self.input.next();
digits.push(c);
}
_ => break,
}
}
break;
}
}
'P' | 'p' => {
if hex {
self.input.next();
int = false;
digits.push(c);
match self.input.peek().cloned() {
Some((_, '-')) => {
self.input.next();
digits.push('-');
}
Some((_, '+')) => {
self.input.next();
}
_ => {}
}
loop {
match self.input.peek().cloned() {
Some((_, c)) if is_numeric_char(c) => {
self.input.next();
digits.push(c);
}
_ => break,
}
}
break;
} else {
panic!()
}
}
_ => break,
}
}
let digits: String = digits.iter().collect();
if int {
let value = u64::from_str_radix(&digits, if hex { 16 } else { 10 });
match value {
Ok(int) => Token::Integer(int as i64),
Err(..) => Token::Float(digits.parse().unwrap()),
}
} else {
let value: f64 = if hex {
parse_f64_hex(&format!("0x{}", digits)).unwrap()
} else {
digits.parse().unwrap()
};
Token::Float(value)
}
}
fn consume_comment(&mut self) {
let mut long = None;
if let Some((_, '[')) = self.input.next() {
let mut len = 0;
loop {
match self.input.next() {
Some((_, '[')) => {
long = Some(len);
break;
}
Some((_, '=')) => len += 1,
_ => break,
}
}
}
#[allow(clippy::while_let_on_iterator)]
while let Some((pos, char)) = self.input.next() {
match char {
']' if long.is_some() => {
let mut len = 0;
let mut closed = false;
loop {
match self.input.next() {
Some((_, ']')) => {
if long == Some(len) {
closed = true;
}
break;
}
Some((_, '=')) => len += 1,
_ => break,
}
}
if closed {
break;
}
}
'\n' if long.is_none() => {
self.line_inc(pos);
break;
}
_ => {}
}
}
}
}
impl<'a> Iterator for Tokenizer<'a> {
type Item = Spanned<Token>;
fn next(&mut self) -> Option<Self::Item> {
self.read_token()
}
}
#[inline]
fn is_whitespace_char(chr: char) -> bool {
matches!(chr, '\t'..='\r' | ' ')
}
#[inline]
fn is_start_name_char(chr: char) -> bool {
matches!(chr, 'A'..='Z' | 'a'..='z' | '_')
}
#[inline]
fn is_name_char(chr: char) -> bool {
matches!(chr, 'A'..='Z' | 'a'..='z' | '0'..='9' | '_')
}
#[inline]
fn is_numeric_char(chr: char) -> bool {
matches!(chr, '0'..='9')
}
#[inline]
fn is_hex_digit_char(chr: char) -> bool {
matches!(chr, 'A'..='F' | 'a'..='f' | '0'..='9')
}