use super::{token::*, LineNumber, MaxValue};
use std::convert::TryFrom;
#[cfg(test)]
#[path = "tests/lex_test.rs"]
mod lex_test;
pub fn lex(s: &str) -> (LineNumber, Vec<Token>) {
let mut tokens = Lexer::lex(s);
let line_number = take_line_number(&mut tokens);
trim_end(&mut tokens);
collapse_go(&mut tokens);
collapse_lt_gt_equal(&mut tokens);
if line_number.is_some() {
separate_words(&mut tokens);
upgrade_tokens(&mut tokens);
}
(line_number, tokens)
}
fn collapse_lt_gt_equal(tokens: &mut Vec<Token>) {
let mut locs: Vec<(usize, Token)> = vec![];
let mut tokens_iter = tokens.windows(2).enumerate();
while let Some((index, tt)) = tokens_iter.next() {
if tt[0] == Token::Operator(Operator::Equal) {
if tt[1] == Token::Operator(Operator::Greater) {
locs.push((index, Token::Operator(Operator::EqualGreater)));
tokens_iter.next();
}
if tt[1] == Token::Operator(Operator::Less) {
locs.push((index, Token::Operator(Operator::EqualLess)));
tokens_iter.next();
}
}
if tt[1] == Token::Operator(Operator::Equal) {
if tt[0] == Token::Operator(Operator::Greater) {
locs.push((index, Token::Operator(Operator::GreaterEqual)));
tokens_iter.next();
}
if tt[0] == Token::Operator(Operator::Less) {
locs.push((index, Token::Operator(Operator::LessEqual)));
tokens_iter.next();
}
}
if tt[0] == Token::Operator(Operator::Less) && tt[1] == Token::Operator(Operator::Greater) {
locs.push((index, Token::Operator(Operator::NotEqual)));
tokens_iter.next();
}
}
while let Some((index, token)) = locs.pop() {
tokens.splice(index..index + 2, Some(token));
}
}
fn collapse_go(tokens: &mut Vec<Token>) {
let mut locs: Vec<(usize, Token)> = vec![];
for (index, ttt) in tokens.windows(3).enumerate() {
if ttt[0] == Token::Ident(Ident::Plain("GO".to_string())) {
if let Token::Whitespace(_) = ttt[1] {
if ttt[2] == Token::Word(Word::To) {
locs.push((index, Token::Word(Word::Goto2)));
}
if ttt[2] == Token::Ident(Ident::Plain("SUB".to_string())) {
locs.push((index, Token::Word(Word::Gosub2)));
}
}
}
}
while let Some((index, token)) = locs.pop() {
tokens.splice(index..index + 3, Some(token));
}
}
fn upgrade_tokens(tokens: &mut Vec<Token>) {
for token in tokens.iter_mut() {
match token {
Token::Word(Word::Print2) => *token = Token::Word(Word::Print1),
Token::Word(Word::Goto2) => *token = Token::Word(Word::Goto1),
Token::Word(Word::Gosub2) => *token = Token::Word(Word::Gosub1),
_ => {}
};
}
}
fn separate_words(tokens: &mut Vec<Token>) {
let mut ins: Vec<usize> = vec![];
for (index, tt) in tokens.windows(2).enumerate() {
if tt.iter().all(|y| y.is_word()) {
ins.push(index);
}
}
while let Some(index) = ins.pop() {
tokens.insert(index + 1, Token::Whitespace(1));
}
}
fn trim_end(tokens: &mut Vec<Token>) {
if let Some(Token::Whitespace(_)) = tokens.last() {
tokens.pop();
}
if let Some(Token::Unknown(_)) = tokens.last() {
if let Some(Token::Unknown(s)) = tokens.pop() {
tokens.push(Token::Unknown(s.trim_end().to_string()));
}
}
}
fn take_line_number(tokens: &mut Vec<Token>) -> LineNumber {
let mut pos: Option<usize> = None;
if let Some(Token::Literal(_)) = tokens.get(1) {
if let Some(Token::Whitespace(_)) = tokens.get(0) {
pos = Some(1);
}
} else if let Some(Token::Literal(_)) = tokens.get(0) {
pos = Some(0);
}
if let Some(pos) = pos {
if let Some(token) = tokens.get(pos) {
if let Ok(line) = LineNumber::try_from(token) {
if let Some(val) = line {
if val <= LineNumber::max_value() {
tokens.drain(0..=pos);
let whitespace_len: usize = match tokens.get(0) {
Some(Token::Whitespace(len)) => *len,
_ => 0,
};
if whitespace_len == 1 {
tokens.remove(0);
}
if whitespace_len > 1 {
if let Some(token) = tokens.get_mut(0) {
*token = Token::Whitespace(whitespace_len - 1);
}
}
return line;
}
}
}
}
}
None
}
fn is_basic_whitespace(c: char) -> bool {
c == ' ' || c == '\t'
}
fn is_basic_digit(c: char) -> bool {
c.is_ascii_digit()
}
fn is_basic_alphabetic(c: char) -> bool {
c.is_ascii_alphabetic()
}
struct Lexer<'a> {
chars: std::iter::Peekable<std::iter::Take<std::str::Chars<'a>>>,
remark: bool,
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
let pk = self.chars.peek()?;
if self.remark {
return Some(Token::Unknown(self.chars.by_ref().collect::<String>()));
}
if is_basic_whitespace(*pk) {
return self.whitespace();
}
if is_basic_digit(*pk) || *pk == '.' {
return self.number();
}
if is_basic_alphabetic(*pk) {
let r = self.alphabetic();
if r == Some(Token::Word(Word::Rem1)) {
self.remark = true;
}
return r;
}
if *pk == '"' {
return self.string();
}
let minutia = self.minutia();
if minutia == Some(Token::Word(Word::Rem2)) {
self.remark = true;
}
minutia
}
}
impl<'a> Lexer<'a> {
fn lex(s: &str) -> Vec<Token> {
let mut take = s.len();
if s.ends_with("\r\n") {
take -= 2
} else if s.ends_with('\n') {
take -= 1
}
Lexer {
chars: s.chars().take(take).peekable(),
remark: false,
}
.collect()
}
fn whitespace(&mut self) -> Option<Token> {
let mut len = 0;
loop {
self.chars.next();
len += 1;
if let Some(pk) = self.chars.peek() {
if is_basic_whitespace(*pk) {
continue;
}
}
return Some(Token::Whitespace(len));
}
}
fn number(&mut self) -> Option<Token> {
let mut s = String::new();
let mut digits = 0;
let mut decimal = false;
let mut exp = false;
loop {
let mut ch = match self.chars.next() {
Some(c) => c,
None => {
debug_assert!(false, "Failed to tokenize number.");
return None;
}
};
if ch == 'e' {
ch = 'E'
}
if ch == 'd' {
ch = 'D'
}
s.push(ch);
if !exp && is_basic_digit(ch) {
digits += 1;
}
if ch == '.' {
decimal = true
}
if ch == 'D' {
digits += 8;
}
if ch == '!' {
return Some(Token::Literal(Literal::Single(s)));
}
if ch == '#' {
return Some(Token::Literal(Literal::Double(s)));
}
if ch == '%' {
return Some(Token::Literal(Literal::Integer(s)));
}
if let Some(pk) = self.chars.peek() {
if ch == 'E' || ch == 'D' {
exp = true;
if *pk == '+' || *pk == '-' {
continue;
}
}
if is_basic_digit(*pk) {
continue;
}
if !decimal && *pk == '.' {
continue;
}
if !exp && *pk == 'E' || *pk == 'e' || *pk == 'D' || *pk == 'd' {
continue;
}
if *pk == '!' || *pk == '#' || *pk == '%' {
continue;
}
}
break;
}
if digits > 7 {
return Some(Token::Literal(Literal::Double(s)));
}
if !exp && !decimal && s.parse::<i16>().is_ok() {
return Some(Token::Literal(Literal::Integer(s)));
}
Some(Token::Literal(Literal::Single(s)))
}
fn string(&mut self) -> Option<Token> {
let mut s = String::new();
self.chars.next();
loop {
if let Some(ch) = self.chars.next() {
if ch != '"' {
s.push(ch);
continue;
}
}
return Some(Token::Literal(Literal::String(s)));
}
}
fn alphabetic(&mut self) -> Option<Token> {
let mut s = String::new();
let mut digit = false;
loop {
let ch = match self.chars.next() {
Some(ch) => ch.to_ascii_uppercase(),
None => {
debug_assert!(false, "Failed to tokenize alphabetic.");
return None;
}
};
s.push(ch);
if is_basic_digit(ch) {
digit = true;
}
if let Some(token) = Token::from_string(&s) {
return Some(token);
}
if ch == '$' {
return Some(Token::Ident(Ident::String(s)));
}
if ch == '!' {
return Some(Token::Ident(Ident::Single(s)));
}
if ch == '#' {
return Some(Token::Ident(Ident::Double(s)));
}
if ch == '%' {
return Some(Token::Ident(Ident::Integer(s)));
}
if let Some(pk) = self.chars.peek() {
if is_basic_alphabetic(*pk) {
if digit {
break;
}
continue;
}
if is_basic_digit(*pk) || *pk == '$' || *pk == '!' || *pk == '#' || *pk == '%' {
continue;
}
}
break;
}
Some(Token::Ident(Ident::Plain(s)))
}
fn minutia(&mut self) -> Option<Token> {
let mut s = String::new();
loop {
if let Some(ch) = self.chars.next() {
s.push(ch);
if let Some(t) = Token::from_string(&s) {
return Some(t);
}
if let Some(pk) = self.chars.peek() {
if is_basic_alphabetic(*pk) {
break;
}
if is_basic_digit(*pk) {
break;
}
if is_basic_whitespace(*pk) {
break;
}
continue;
}
break;
}
}
Some(Token::Unknown(s))
}
}