use std::str::Chars;
#[derive(Debug, PartialEq)]
pub struct Token<'a> {
pub value: &'a str,
pub byte_index: i32,
pub token_type: TokenType,
}
const EOF_TOKEN: Token = Token {
value: "EOF",
byte_index: -1,
token_type: TokenType::Eof,
};
#[derive(Debug, PartialEq)]
pub enum TokenType {
Char,
Comment,
Dec,
Eof,
Flt,
Ident,
Int,
Keyword,
OneCharSym,
Str,
ThreeCharSym,
TwoCharSym,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq)]
struct LexerIndex {
char: char,
char_index: i32,
byte_index: i32,
}
struct LexerIter<'a> {
source: &'a str,
str_iter: Chars<'a>,
current: Option<LexerIndex>,
current_plus_1: Option<LexerIndex>,
current_plus_2: Option<LexerIndex>,
}
const DELIMITERS: &[u8] = "!\"#%&'()*+,-./:;<=>@[]`{|}~".as_bytes();
const ONE_CHAR_SYMBOLS: &[u8] = "!\"#% '()*+,-./:;<=>@[]`{ }~".as_bytes();
const TWO_CHAR_SYMBOLS: [[u8; 2]; 27] = [
[b'=', b' '], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b'&', b' '], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b'>', b' '], [b' ', b' '], [b' ', b' '], [b'=', b':'], [b' ', b' '], [b'=', b':'], [b'=', b':'], [b'=', b':'], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b' ', b' '], [b'|', b' '], [b' ', b' '], [b' ', b' '], ];
const COMMENT_IS_MISSING_CLOSING_SEQUENCE: &str = "Comment is missing closing sequence '*/'";
const FLOATING_POINT_SUFFIX_MUST_BE_ONE_OF: &str = "Floating point suffix must be one of [fFdDmM]";
const INTEGER_SUFFIX_MUST_BE_ONE_OF: &str = "Integer suffix must be one of [lLmM]";
const INVALID_DECIMAL_NUMBER: &str = "Invalid decimal number";
const INVALID_FLOATING_POINT_NUMBER: &str = "Invalid floating point number";
const INVALID_HEXADECIMAL_NUMBER: &str = "Invalid hexadecimal number";
const INVALID_INTEGER_NUMBER: &str = "Invalid integer";
#[derive(Debug, PartialEq)]
pub struct LexerError {
message: &'static str,
index: LexerIndex,
}
impl<'a> LexerIter<'a> {
fn current(&self) -> Option<LexerIndex> {
self.current
}
fn fetch_next_char(str_iter: &mut Chars, current: &Option<LexerIndex>) -> Option<LexerIndex> {
if let Some(next_char) = str_iter.next() {
if let Some(current) = current {
Some(LexerIndex {
char: next_char,
char_index: current.char_index + 1,
byte_index: current.byte_index + current.char.len_utf8() as i32,
})
} else {
Some(LexerIndex {
char: next_char,
char_index: 0,
byte_index: 0,
})
}
} else {
None
}
}
fn index_of_delimiter(c: char) -> isize {
let mut left = 0;
let mut right = DELIMITERS.len() - 1;
while left <= right {
let mid = (left + right) / 2;
match DELIMITERS[mid].cmp(&(c as u8)) {
std::cmp::Ordering::Equal => return mid as isize,
std::cmp::Ordering::Less => left = mid + 1,
std::cmp::Ordering::Greater => right = mid - 1,
}
}
-1
}
fn is_block_comment_content(index1: Option<LexerIndex>, index2: Option<LexerIndex>,) -> bool {
if index1.is_none() || index2.is_none() {
return false;
}
index1.unwrap().char != '*' || index2.unwrap().char != '/'
}
fn is_delimiter(c: char) -> bool {
Self::index_of_delimiter(c) > -1
}
fn is_digit(c: char) -> bool {
c >= '0' && c <= '9'
}
fn is_eof_or_separator(index: Option<LexerIndex>) -> bool {
index.is_none() || Self::is_separator(index.unwrap().char)
}
fn is_hex_digit(c: char) -> bool {
c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
}
fn is_line_comment_content(index: Option<LexerIndex>) -> bool {
if index.is_none() {
return false;
}
index.unwrap().char != '\n'
}
fn is_separator(c: char) -> bool {
Self::is_whitespace(c) || Self::is_delimiter(c)
}
fn is_some_char(index: Option<LexerIndex>, c: char) -> bool {
if index.is_none() {
return false;
}
index.unwrap().char == c
}
fn is_some_digit(index: Option<LexerIndex>) -> bool {
if index.is_none() {
return false;
}
Self::is_digit(index.unwrap().char)
}
fn is_whitespace(c: char) -> bool {
if c >= '\u{0009}' && c <= '\u{000D}' {
return true;
}
if c == ' ' {
return true;
}
if c == '\u{0085}' {
return true;
}
if c == '\u{200E}' || c == '\u{200F}' {
return true;
}
c == '\u{2028}' || c == '\u{2029}'
}
fn make_invalid_number_err(
&self,
start: LexerIndex,
token_type: TokenType,
) -> LexerError {
let m = if token_type == TokenType::Int {
INVALID_INTEGER_NUMBER
} else {
INVALID_DECIMAL_NUMBER
};
LexerError {
message: m,
index: start,
}
}
fn make_token(
&self,
start: LexerIndex,
stop: LexerIndex,
token_type: TokenType,
) -> Token<'a> {
let token_start = start.byte_index as usize;
let token_stop = stop.byte_index as usize + 1;
Token {
value: &self.source[token_start..token_stop],
byte_index: start.byte_index,
token_type,
}
}
fn new(source: &'a str) -> Self {
let mut str_iter = source.chars();
let current = Self::fetch_next_char(&mut str_iter, &None);
LexerIter {
source,
str_iter,
current,
current_plus_1: None,
current_plus_2: None,
}
}
fn next(&mut self) -> Result<Token, LexerError> {
self.skip_whitespace();
if self.current.is_none() {
return Ok(EOF_TOKEN);
}
let current = self.current().unwrap();
if Self::is_digit(current.char) {
return self.parse_num();
}
if current.char == '\'' {
return self.parse_single_quoted_str();
}
if current.char == '"' {
return self.parse_double_quoted_str();
}
if current.char == '`' {
return self.parse_quoted_ident();
}
if current.char == '/' && self.peek_1().is_some() {
let p1 = self.current_plus_1.unwrap().char;
if p1 == '/' {
return self.parse_line_comment();
} else if p1 == '*' {
return self.parse_block_comment();
}
}
let index = Self::index_of_delimiter(current.char);
if index > -1 {
return self.parse_symbol(index as usize);
}
self.parse_keyword_or_ident()
}
fn next_char(&mut self) {
if self.current_plus_1.is_some() {
self.current = self.current_plus_1;
if self.current_plus_2.is_some() {
self.current_plus_1 = self.current_plus_2;
self.current_plus_2 = None;
} else {
self.current_plus_1 = None;
}
} else {
self.current = Self::fetch_next_char(&mut self.str_iter, &self.current);
}
}
fn parse_block_comment(&mut self) -> Result<Token<'a>, LexerError> {
let start = self.current.unwrap();
self.next_char();
self.next_char();
while Self::is_block_comment_content(self.current, self.peek_1()) {
self.next_char();
}
if self.current.is_none() || self.current_plus_1.is_none() {
return Err(LexerError {
message: COMMENT_IS_MISSING_CLOSING_SEQUENCE,
index: start,
});
}
self.next_char();
let stop = self.current.unwrap();
self.next_char();
Ok(self.make_token(start, stop, TokenType::Comment))
}
fn parse_double_quoted_str(&mut self) -> Result<Token<'a>, LexerError> {
panic!("Needs impl - parse double quoted string")
}
fn parse_fractional_exponent(&mut self, start: LexerIndex) -> Result<Token<'a>, LexerError> {
self.next_char();
if self.current.is_none() {
return Err(LexerError {
message: INVALID_FLOATING_POINT_NUMBER,
index: start,
});
}
let current_char = self.current.unwrap().char;
if current_char == '-' || current_char == '+' {
self.next_char();
if self.current.is_none() {
return Err(LexerError {
message: INVALID_FLOATING_POINT_NUMBER,
index: start,
});
}
}
if !Self::is_digit(self.current.unwrap().char) {
return Err(LexerError {
message: INVALID_FLOATING_POINT_NUMBER,
index: start,
});
}
while Self::is_some_digit(self.peek_1()) {
self.next_char();
}
let last_exponent_digit = self.current.unwrap();
if Self::is_eof_or_separator(self.peek_1()) {
if self.current_plus_1.is_some() {
self.next_char();
}
return Ok(self.make_token(start, last_exponent_digit, TokenType::Flt));
}
let p1 = self.current_plus_1.unwrap().char;
if p1 == 'f' || p1 == 'F' || p1 == 'd' || p1 == 'D' {
self.next_char();
}
let stop = self.current.unwrap();
self.next_char();
if !Self::is_eof_or_separator(self.current) {
return Err(LexerError {
message: INVALID_FLOATING_POINT_NUMBER,
index: start,
});
}
Ok(self.make_token(start, stop, TokenType::Flt))
}
fn parse_fractional_part(&mut self, start: LexerIndex) -> Result<Token<'a>, LexerError> {
while Self::is_some_digit(self.peek_1()) {
self.next_char();
}
let last_fractional_digit = self.current.unwrap();
if Self::is_eof_or_separator(self.peek_1()) {
if self.current_plus_1.is_some() {
self.next_char();
}
return Ok(self.make_token(start, last_fractional_digit, TokenType::Flt));
}
let possible_exponent_or_suffix = self.current_plus_1.unwrap();
if possible_exponent_or_suffix.char == 'e' || possible_exponent_or_suffix.char == 'E' {
self.next_char();
return self.parse_fractional_exponent(start);
}
let token_type = match possible_exponent_or_suffix.char {
'f' | 'F' | 'd' | 'D' => TokenType::Flt,
'm' | 'M' => TokenType::Dec,
_ => {
return Err(LexerError {
message: FLOATING_POINT_SUFFIX_MUST_BE_ONE_OF,
index: start,
})
}
};
self.next_char();
self.next_char();
if Self::is_eof_or_separator(self.current) {
Ok(self.make_token(start, possible_exponent_or_suffix, token_type))
} else {
Err(self.make_invalid_number_err(start, token_type))
}
}
fn parse_hex_int(&mut self, start: LexerIndex) -> Result<Token<'a>, LexerError> {
if self.peek_1().is_none() || self.peek_2().is_none() {
return Err(LexerError {
message: INVALID_HEXADECIMAL_NUMBER,
index: start,
});
}
let p1 = self.current_plus_1.unwrap();
let p2 = self.current_plus_2.unwrap();
if !Self::is_hex_digit(p1.char) || !Self::is_hex_digit(p2.char) {
return Err(LexerError {
message: INVALID_HEXADECIMAL_NUMBER,
index: start,
});
}
self.next_char();
self.next_char();
self.next_char();
if !Self::is_eof_or_separator(self.current) {
return Err(LexerError {
message: INVALID_FLOATING_POINT_NUMBER,
index: start,
});
}
Ok(self.make_token(start, p2, TokenType::Int))
}
fn parse_keyword_or_ident(&mut self) -> Result<Token<'a>, LexerError> {
panic!("Needs impl - parse keyword or ident")
}
fn parse_line_comment(&mut self) -> Result<Token<'a>, LexerError> {
let start = self.current.unwrap();
self.next_char();
while Self::is_line_comment_content(self.peek_1()) {
self.next_char();
}
let stop = self.current.unwrap();
self.next_char();
Ok(self.make_token(start, stop, TokenType::Comment))
}
fn parse_num(&mut self) -> Result<Token<'a>, LexerError> {
let start = self.current.unwrap();
if start.char == '0' && self.peek_1().is_some() {
let p1 = self.current_plus_1.unwrap().char;
if p1 == 'x' || p1 == 'X' {
self.next_char();
return self.parse_hex_int(start);
}
}
while Self::is_some_digit(self.peek_1()) {
self.next_char(); }
if Self::is_some_char(self.peek_1(), '.') {
self.next_char();
self.next_char();
if self.current.is_none() || !Self::is_digit(self.current.unwrap().char) {
return Err(LexerError {
message: INVALID_FLOATING_POINT_NUMBER,
index: start,
});
}
return self.parse_fractional_part(start);
}
if Self::is_eof_or_separator(self.peek_1()) {
let token_start = start.byte_index as usize;
let token_stop = self.current.unwrap().byte_index as usize + 1;
if self.current_plus_1.is_some() {
self.next_char();
}
return Ok(Token {
value: &self.source[token_start..token_stop],
byte_index: start.byte_index,
token_type: TokenType::Int,
});
}
let possible_suffix = self.current_plus_1.unwrap();
let token_type = match possible_suffix.char {
'l' | 'L' => TokenType::Int,
'm' | 'M' => TokenType::Dec,
_ => {
return Err(LexerError {
message: INTEGER_SUFFIX_MUST_BE_ONE_OF,
index: start,
})
}
};
self.next_char();
self.next_char();
if Self::is_eof_or_separator(self.current) {
Ok(self.make_token(start, possible_suffix, token_type))
} else {
Err(self.make_invalid_number_err(start, token_type))
}
}
fn parse_quoted_ident(&mut self) -> Result<Token<'a>, LexerError> {
panic!("Needs impl - parse quoted ident")
}
fn parse_single_quoted_str(&mut self) -> Result<Token<'a>, LexerError> {
panic!("Needs impl - parse single quoted string")
}
fn parse_symbol(&mut self, index: usize) -> Result<Token<'a>, LexerError> {
let first_index = self.current.unwrap();
self.next_char();
if self.current.is_some() {
let second_index = self.current.unwrap();
if first_index.char == '.' {
if second_index.char == '.' && Self::is_some_char(self.peek_1(), '.') {
self.next_char();
let third_index = self.current.unwrap();
self.next_char();
return Ok(self.make_token(first_index, third_index, TokenType::ThreeCharSym));
}
}
let second_chars: [u8; 2] = TWO_CHAR_SYMBOLS[index];
if second_index.char == second_chars[0] as char {
self.next_char();
return Ok(self.make_token(first_index, second_index, TokenType::TwoCharSym));
} else if second_index.char == second_chars[1] as char {
self.next_char();
return Ok(self.make_token(first_index, second_index, TokenType::TwoCharSym));
}
}
Ok(self.make_token(first_index, first_index, TokenType::OneCharSym))
}
fn peek_1(&mut self) -> Option<LexerIndex> {
if self.current_plus_1.is_none() {
self.current_plus_1 = Self::fetch_next_char(&mut self.str_iter, &self.current);
}
self.current_plus_1
}
fn peek_2(&mut self) -> Option<LexerIndex> {
if self.current_plus_1.is_none() {
self.current_plus_1 = Self::fetch_next_char(&mut self.str_iter, &self.current);
self.current_plus_2 = Self::fetch_next_char(&mut self.str_iter, &self.current_plus_1);
} else if self.current_plus_2.is_none() {
self.current_plus_2 = Self::fetch_next_char(&mut self.str_iter, &self.current_plus_1);
}
self.current_plus_2
}
fn skip_whitespace(&mut self) {
while self.current.is_some() && Self::is_whitespace(self.current.unwrap().char) {
self.next_char();
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_parse_eq(source: &str) {
let mut lexer_iter = LexerIter::new(source);
let next_token = lexer_iter.next();
assert_eq!(true, next_token.is_ok());
assert_eq!(source, next_token.unwrap().value);
let next_token = lexer_iter.next();
assert_eq!(true, next_token.is_ok());
assert_eq!(EOF_TOKEN, next_token.unwrap());
}
#[test]
fn test_adjacent_one_char_symbols() {
let source = "!#%()*+,-./:;<>@[]{}~=";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("!", lexer_iter.next().unwrap().value);
assert_eq!("#", lexer_iter.next().unwrap().value);
assert_eq!("%", lexer_iter.next().unwrap().value);
assert_eq!("(", lexer_iter.next().unwrap().value);
assert_eq!(")", lexer_iter.next().unwrap().value);
assert_eq!("*", lexer_iter.next().unwrap().value);
assert_eq!("+", lexer_iter.next().unwrap().value);
assert_eq!(",", lexer_iter.next().unwrap().value);
assert_eq!("-", lexer_iter.next().unwrap().value);
assert_eq!(".", lexer_iter.next().unwrap().value);
assert_eq!("/", lexer_iter.next().unwrap().value);
assert_eq!(":", lexer_iter.next().unwrap().value);
assert_eq!(";", lexer_iter.next().unwrap().value);
assert_eq!("<", lexer_iter.next().unwrap().value);
assert_eq!(">", lexer_iter.next().unwrap().value);
assert_eq!("@", lexer_iter.next().unwrap().value);
assert_eq!("[", lexer_iter.next().unwrap().value);
assert_eq!("]", lexer_iter.next().unwrap().value);
assert_eq!("{", lexer_iter.next().unwrap().value);
assert_eq!("}", lexer_iter.next().unwrap().value);
assert_eq!("~", lexer_iter.next().unwrap().value);
assert_eq!("=", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_adjacent_two_char_symbols() {
let source = "==!=<=>=&&->||:=::<:>:";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("==", lexer_iter.next().unwrap().value);
assert_eq!("!=", lexer_iter.next().unwrap().value);
assert_eq!("<=", lexer_iter.next().unwrap().value);
assert_eq!(">=", lexer_iter.next().unwrap().value);
assert_eq!("&&", lexer_iter.next().unwrap().value);
assert_eq!("->", lexer_iter.next().unwrap().value);
assert_eq!("||", lexer_iter.next().unwrap().value);
assert_eq!(":=", lexer_iter.next().unwrap().value);
assert_eq!("::", lexer_iter.next().unwrap().value);
assert_eq!("<:", lexer_iter.next().unwrap().value);
assert_eq!(">:", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_block_comment() {
let source = "/**/";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("/**/", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "/*\n*/";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("/*\n*/", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "/*a*/";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("/*a*/", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "/*\na*/";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("/*\na*/", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "/*a\n*/";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("/*a\n*/", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "1/*a\n*/";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("1", lexer_iter.next().unwrap().value);
assert_eq!("/*a\n*/", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_delimiters() {
assert_eq!(true, LexerIter::is_delimiter('!'));
assert_eq!(true, LexerIter::is_delimiter('"'));
assert_eq!(true, LexerIter::is_delimiter('#'));
assert_eq!(true, LexerIter::is_delimiter('%'));
assert_eq!(true, LexerIter::is_delimiter('&'));
assert_eq!(true, LexerIter::is_delimiter('\''));
assert_eq!(true, LexerIter::is_delimiter('('));
assert_eq!(true, LexerIter::is_delimiter(')'));
assert_eq!(true, LexerIter::is_delimiter('*'));
assert_eq!(true, LexerIter::is_delimiter('+'));
assert_eq!(true, LexerIter::is_delimiter(','));
assert_eq!(true, LexerIter::is_delimiter('-'));
assert_eq!(true, LexerIter::is_delimiter('.'));
assert_eq!(true, LexerIter::is_delimiter('/'));
assert_eq!(true, LexerIter::is_delimiter(':'));
assert_eq!(true, LexerIter::is_delimiter(';'));
assert_eq!(true, LexerIter::is_delimiter('<'));
assert_eq!(true, LexerIter::is_delimiter('='));
assert_eq!(true, LexerIter::is_delimiter('>'));
assert_eq!(true, LexerIter::is_delimiter('@'));
assert_eq!(true, LexerIter::is_delimiter('['));
assert_eq!(true, LexerIter::is_delimiter(']'));
assert_eq!(true, LexerIter::is_delimiter('`'));
assert_eq!(true, LexerIter::is_delimiter('{'));
assert_eq!(true, LexerIter::is_delimiter('|'));
assert_eq!(true, LexerIter::is_delimiter('}'));
assert_eq!(true, LexerIter::is_delimiter('~'));
assert_eq!(false, LexerIter::is_delimiter('?'));
assert_eq!(false, LexerIter::is_delimiter('^'));
}
#[test]
fn test_hex_int_with_small_x() {
let source = "0xAE";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("0xAE", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Int, n.token_type);
}
#[test]
fn test_hex_int_with_large_x() {
let source = "0Xae";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("0Xae", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Int, n.token_type);
}
#[test]
fn test_iter_chars() {
let crab = '🦀';
assert_eq!(4, crab.len_utf8());
let source = "1+🦀+2";
let mut lexer_iter = LexerIter::new(source);
assert_eq!(true, lexer_iter.current.is_some());
assert_eq!('1', lexer_iter.current.unwrap().char);
assert_eq!(0, lexer_iter.current.unwrap().byte_index);
assert_eq!(0, lexer_iter.current.unwrap().char_index);
lexer_iter.next_char();
assert_eq!('+', lexer_iter.current.unwrap().char);
assert_eq!(1, lexer_iter.current.unwrap().byte_index);
assert_eq!(1, lexer_iter.current.unwrap().char_index);
lexer_iter.next_char();
assert_eq!('🦀', lexer_iter.current.unwrap().char);
assert_eq!(2, lexer_iter.current.unwrap().byte_index);
assert_eq!(2, lexer_iter.current.unwrap().char_index);
lexer_iter.next_char();
assert_eq!('+', lexer_iter.current.unwrap().char);
assert_eq!(6, lexer_iter.current.unwrap().byte_index);
assert_eq!(3, lexer_iter.current.unwrap().char_index);
lexer_iter.next_char();
assert_eq!('2', lexer_iter.current.unwrap().char);
assert_eq!(7, lexer_iter.current.unwrap().byte_index);
assert_eq!(4, lexer_iter.current.unwrap().char_index);
lexer_iter.next_char();
assert_eq!(true, lexer_iter.current.is_none());
}
#[test]
fn test_line_comment() {
let source = "//";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("//", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "// ";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("// ", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "//a";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("//a", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "//\n//";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("//", lexer_iter.next().unwrap().value);
assert_eq!("//", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "// \n//";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("// ", lexer_iter.next().unwrap().value);
assert_eq!("//", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "//a";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("//a", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
let source = "//a\n//";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("//a", lexer_iter.next().unwrap().value);
assert_eq!("//", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_multi_digit_int() {
let source = "23";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("23", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Int, n.token_type);
}
#[test]
fn test_one_char_symbols() {
assert_parse_eq("!");
assert_parse_eq("#");
assert_parse_eq("%");
assert_parse_eq("(");
assert_parse_eq(")");
assert_parse_eq("*");
assert_parse_eq("+");
assert_parse_eq(",");
assert_parse_eq("-");
assert_parse_eq(".");
assert_parse_eq("/");
assert_parse_eq(":");
assert_parse_eq(";");
assert_parse_eq("<");
assert_parse_eq("=");
assert_parse_eq(">");
assert_parse_eq("@");
assert_parse_eq("[");
assert_parse_eq("]");
assert_parse_eq("{");
assert_parse_eq("|");
assert_parse_eq("}");
assert_parse_eq("~");
}
#[test]
fn test_real() {
let source = "23.1";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("23.1", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Flt, n.token_type);
}
#[test]
fn test_real_division() {
let source = "23.1/13.2";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("23.1", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Flt, n.token_type);
let n = lexer_iter.next().unwrap();
assert_eq!("/", n.value);
assert_eq!(4, n.byte_index);
assert_eq!(TokenType::OneCharSym, n.token_type);
}
#[test]
fn test_real_division_with_exponents() {
let source = "231.0e-1/132.0e-1";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("231.0e-1", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Flt, n.token_type);
let n = lexer_iter.next().unwrap();
assert_eq!("/", n.value);
assert_eq!(8, n.byte_index);
assert_eq!(TokenType::OneCharSym, n.token_type);
let n = lexer_iter.next().unwrap();
assert_eq!("132.0e-1", n.value);
assert_eq!(9, n.byte_index);
assert_eq!(TokenType::Flt, n.token_type);
}
#[test]
fn test_real_with_one_digit_exponent() {
let source = "1.0e-1";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("1.0e-1", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Flt, n.token_type);
}
#[test]
fn test_real_with_two_digit_exponent() {
let source = "1.0e+12";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("1.0e+12", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Flt, n.token_type);
}
#[test]
fn test_real_with_invalid_suffix_error() {
let source = "23.0x";
let mut lexer_iter = LexerIter::new(source);
let r = lexer_iter.next();
if r.is_ok() {
panic!("Error expected");
}
assert_eq!(
FLOATING_POINT_SUFFIX_MUST_BE_ONE_OF,
r.err().unwrap().message
);
}
#[test]
fn test_real_with_trailing_period_error() {
let source = "23.";
let mut lexer_iter = LexerIter::new(source);
let r = lexer_iter.next();
if r.is_ok() {
panic!("Error expected");
}
assert_eq!(INVALID_FLOATING_POINT_NUMBER, r.err().unwrap().message);
}
#[test]
fn test_real_with_trailing_suffix_error() {
let source = "23.f";
let mut lexer_iter = LexerIter::new(source);
let r = lexer_iter.next();
if r.is_ok() {
panic!("Error expected");
}
assert_eq!(INVALID_FLOATING_POINT_NUMBER, r.err().unwrap().message);
}
#[test]
fn test_single_digit_int() {
let source = "1";
let mut lexer_iter = LexerIter::new(source);
let n = lexer_iter.next().unwrap();
assert_eq!("1", n.value);
assert_eq!(0, n.byte_index);
assert_eq!(TokenType::Int, n.token_type);
}
#[test]
fn test_three_char_symbols() {
assert_parse_eq("...");
}
#[test]
fn test_three_equals() {
let source = "===";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("==", lexer_iter.next().unwrap().value);
assert_eq!("=", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_two_char_symbols() {
assert_parse_eq("!=");
assert_parse_eq("&&");
assert_parse_eq("->");
assert_parse_eq(":=");
assert_parse_eq("::");
assert_parse_eq("<=");
assert_parse_eq("<:");
assert_parse_eq("==");
assert_parse_eq(">=");
assert_parse_eq(">:");
assert_parse_eq("||");
}
#[test]
fn test_two_periods() {
let source = "..";
let mut lexer_iter = LexerIter::new(source);
assert_eq!(".", lexer_iter.next().unwrap().value);
assert_eq!(".", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_two_subtracts() {
let source = "--";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("-", lexer_iter.next().unwrap().value);
assert_eq!("-", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_four_periods() {
let source = "....";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("...", lexer_iter.next().unwrap().value);
assert_eq!(".", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
#[test]
fn test_five_periods() {
let source = ".....";
let mut lexer_iter = LexerIter::new(source);
assert_eq!("...", lexer_iter.next().unwrap().value);
assert_eq!(".", lexer_iter.next().unwrap().value);
assert_eq!(".", lexer_iter.next().unwrap().value);
assert_eq!(EOF_TOKEN, lexer_iter.next().unwrap());
}
}