use crate::error::*;
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum TokenClass {
Identifier,
Int,
Float,
Assignment,
Plus,
Minus,
Multiply,
Divide,
Eq,
Semicolon,
Comma,
Newline,
Prime,
OpenParen,
CloseParen,
OpenBracket,
CloseBracket,
}
#[derive(Clone, Debug)]
pub struct Token {
class: TokenClass,
value: String,
}
impl Token {
pub fn new(class: TokenClass, value: String) -> Self {
Self {
class,
value,
}
}
pub fn get_class(&self) -> TokenClass {
self.class
}
pub fn get_value(&self) -> String {
self.value.to_owned()
}
pub fn check(&self, class: TokenClass) -> bool {
self.class == class
}
}
pub struct CharStream {
characters: Vec<char>,
index: usize,
}
impl CharStream {
pub fn from(input: String) -> Self {
let characters = input.as_str().chars().collect::<Vec<char>>();
let index = 0;
Self {
characters,
index,
}
}
pub fn next(&mut self) -> Option<char> {
let character = self.peek();
if self.index >= self.characters.len() {
None
} else {
self.index += 1;
character
}
}
pub fn peek(&self) -> Option<char> {
if self.index >= self.characters.len() {
None
} else {
Some (self.characters[self.index])
}
}
pub fn lookahead(&self, n: usize) -> Option<char> {
if self.index >= self.characters.len() {
None
} else {
Some (self.characters[self.index + n])
}
}
pub fn get(&mut self, superstring: &str) -> String {
let mut current = String::new();
while let Some(c) = self.peek() {
if superstring.contains(c) {
self.next();
current.push(c);
} else {
break;
}
}
current
}
pub fn skip_comments(&mut self) {
while self.peek() == Some('/') && self.lookahead(1) == Some('/') {
while self.peek() != Some('\n') {
self.next();
}
self.next();
}
}
}
const IDENTIFIER: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
const NUMERIC: &str = "01235456789.";
const SEPARATORS: &str = " \t\n";
pub struct Tokenizer {
tokens: Vec<Token>,
index: usize,
}
impl Tokenizer {
pub fn from(input: String) -> Self {
let index = 0;
let mut charstream = CharStream::from(input);
let mut tokens = Vec::new();
charstream.skip_comments();
while let Some(c) = charstream.next() {
if SEPARATORS.contains(c) {
continue;
}
let token = match c {
'a'..='z' | 'A'..='Z' | '_' => {
let name = format!(
"{}{}",
c,
charstream.get(IDENTIFIER),
);
Token::new(TokenClass::Identifier, name)
},
'0'..='9' => {
let raw = format!(
"{}{}",
c,
charstream.get(NUMERIC),
);
let token = match str::parse::<i64>(&raw) {
Ok(_) => Token::new(TokenClass::Int, raw),
Err(_) => match str::parse::<f64>(&raw) {
Ok(_) => Token::new(TokenClass::Float, raw),
Err(_) => {
throw(CouldNotParseNumeric);
Token::new(TokenClass::Float, "0.0".to_string())
},
},
};
token
},
'=' => if charstream.peek() == Some('=') {
Token::new(TokenClass::Eq, "==".to_string())
} else if let Some(_) = charstream.peek() {
Token::new(TokenClass::Assignment, "=".to_string())
} else {
throw(UnexpectedEof);
Token::new(TokenClass::Newline, '\n'.to_string())
},
'\n' => Token::new(TokenClass::Newline, '\n'.to_string()),
'+' => Token::new(TokenClass::Plus, '+'.to_string()),
'-' => {
let chr = match charstream.peek() {
Some(p) => p,
None => {
throw(UnexpectedEof);
'\n'
},
};
if NUMERIC.contains(chr) {
let raw = format!(
"{}{}",
c,
charstream.get(NUMERIC),
);
let token = match str::parse::<i64>(&raw) {
Ok(_) => Token::new(TokenClass::Int, raw),
Err(_) => match str::parse::<f64>(&raw) {
Ok(_) => Token::new(TokenClass::Float, raw),
Err(_) => {
throw(CouldNotParseNumeric);
Token::new(TokenClass::Float, "0.0".to_string())
},
},
};
token
} else {
Token::new(TokenClass::Minus, '-'.to_string())
}
}
'*' => Token::new(TokenClass::Multiply, '*'.to_string()),
'/' => Token::new(TokenClass::Divide, '/'.to_string()),
';' => Token::new(TokenClass::Semicolon, ';'.to_string()),
'(' => Token::new(TokenClass::OpenParen, '('.to_string()),
')' => Token::new(TokenClass::CloseParen, ')'.to_string()),
'[' => Token::new(TokenClass::OpenBracket, '['.to_string()),
']' => Token::new(TokenClass::CloseBracket, ']'.to_string()),
',' => Token::new(TokenClass::Comma, ';'.to_string()),
'\'' => Token::new(TokenClass::Prime, '\''.to_string()),
_ => {
throw(UnexpectedEof);
Token::new(TokenClass::Newline, '\n'.to_string())
},
};
tokens.push(token);
charstream.skip_comments();
}
Self {
tokens,
index,
}
}
pub fn peek(&self) -> Option<Token> {
if self.index >= self.tokens.len() {
None
} else {
Some (self.tokens[self.index].to_owned())
}
}
pub fn next(&mut self) -> Option<Token> {
let token = self.peek();
self.index += 1;
token
}
pub fn get_tokens(&mut self) -> Vec<Token> {
self.tokens.to_owned()
}
pub fn chk_silent(&self) -> bool {
if self.tokens.len() != 0 {
self.tokens.len() != 0 && self.tokens[self.tokens.len() - 1].get_class() == TokenClass::Semicolon
} else {
true
}
}
pub fn get_next_precedence(&self) -> u8 {
if let Some(t) = self.peek() {
t.get_class().into()
} else {
0
}
}
}
#[test]
fn tokenize_00() {
let input: String = "x = 1.3\ny = 2.6".to_string();
let mut tokenizer = Tokenizer::from(input);
println!("Tokens: {:#?}", tokenizer.get_tokens());
}