use crate::directives;
use crate::error;
use crate::inst::Instructions;
use crate::reg::Registers;
use crate::token::Token;
use crate::token::TokenType;
use std::{iter::Peekable, str::Chars};
pub struct Scanner {
source: String,
}
impl Scanner {
pub fn new(source: String) -> Scanner {
Scanner { source }
}
pub fn scan_tokens(&self) -> Result<Vec<Token>, String> {
let mut tokens = vec![];
let mut line_number = 0;
let mut find_end_of_line = false;
let mut chars = self.source.chars().peekable();
loop {
let c = chars.next();
let c = match c {
Some(c) => c,
None => break,
};
if find_end_of_line && c == '\n' {
find_end_of_line = false;
} else if find_end_of_line {
continue;
}
if c == '#' || c == ';' {
find_end_of_line = true;
} else if c == '(' {
tokens.push(Token::new(
c.to_string(),
TokenType::LeftParantheses,
line_number,
));
} else if c == ')' {
tokens.push(Token::new(
c.to_string(),
TokenType::RightParantheses,
line_number,
));
} else if c == ':' {
tokens.push(Token::new(c.to_string(), TokenType::Colon, line_number));
} else if c == ',' {
tokens.push(Token::new(c.to_string(), TokenType::Comma, line_number));
}
else if c == '\n' {
tokens.push(Token::new(c.to_string(), TokenType::LineBreak, line_number));
line_number += 1;
}
else if c.is_whitespace() {
continue;
}
else if c == '"' {
if let Ok(string) = Self::extract_string(c, &mut chars) {
tokens.push(Token::new(string, TokenType::String, line_number));
} else {
return Err(error::error(
line_number,
"Syntax error",
"Error in parsing string",
));
}
}
else if c.is_ascii_digit() || c == '-' {
if let Ok((string, number)) = Self::extract_number(c, &mut chars) {
tokens.push(Token::new_number(string, number, line_number));
} else {
return Err(error::error(
line_number,
"Syntax error",
"Error in parsing number",
));
}
}
else if c.is_ascii_alphabetic()
|| c == '_'
|| c == '.'
|| c == '%'
|| c == '$'
|| c == '@'
{
if let Ok(string) = Self::extract_identifier(c, &mut chars) {
if let Some(r) = Registers::get_number(&string.clone()) {
tokens.push(Token::new_reg(string, *r, line_number));
} else if Instructions::is_valid_op(&string) {
tokens.push(Token::new(string, TokenType::Opcode, line_number));
} else if directives::is_directive(&string) {
tokens.push(Token::new(string, TokenType::Directive, line_number));
} else {
tokens.push(Token::new(string, TokenType::Identifier, line_number));
}
} else {
return Err(error::error(
line_number,
"Syntax error",
"Error in parsing identifier",
));
}
}
else {
return Err(error::error(
line_number,
"Syntax error",
"Unexpected character",
));
}
}
Ok(tokens)
}
fn extract_string(_c: char, chars: &mut Peekable<Chars>) -> Result<String, ()> {
let mut string = "".to_string();
while let Some(&c) = chars.peek() {
if c == '\n' {
return Err(());
}
if c == '"' {
chars.next();
return Ok(string);
}
string += &c.to_string();
chars.next();
}
Err(())
}
fn extract_number(c: char, chars: &mut Peekable<Chars>) -> Result<(String, i32), ()> {
let mut string = c.to_string();
while let Some(&c) = chars.peek() {
if c.is_whitespace()
|| c == '#'
|| c == ';'
|| c == '('
|| c == ')'
|| c == ','
|| c == ':'
{
break;
}
if c.is_ascii_hexdigit() || c == 'x' || c == 'h' || c == 'q' || c == 'y' {
string += &c.to_string();
chars.next();
} else {
return Err(());
}
}
if let Ok(number) = imm_string_to_i32(&string) {
Ok((string, number))
} else {
Err(())
}
}
fn extract_identifier(c: char, chars: &mut Peekable<Chars>) -> Result<String, ()> {
let mut string = c.to_string();
while let Some(&c) = chars.peek() {
if c.is_whitespace()
|| c == '#'
|| c == ';'
|| c == '('
|| c == ')'
|| c == ','
|| c == ':'
{
break;
}
if c.is_ascii_alphabetic()
|| c.is_ascii_digit()
|| c == '_'
|| c == '.'
|| c == '$'
|| c == '@'
{
string += &c.to_string();
chars.next();
} else {
return Err(());
}
}
Ok(string)
}
}
fn imm_string_to_i32(imm_string: &str) -> Result<i32, &'static str> {
let mut imm_chars = imm_string.chars().peekable();
let mut sign = 1;
if let Some(&c) = imm_chars.peek() {
if c == '+' || c == '-' {
if c == '-' {
sign = -1;
}
imm_chars.next();
}
} else {
return Err("Found empty string");
}
let mut radix = 10;
if let Some(&c) = imm_chars.peek() {
if c == '0' {
imm_chars.next();
if let Some(c) = imm_chars.peek() {
match c {
'x' | 'h' => {
radix = 16;
imm_chars.next();
}
'b' | 'y' => {
radix = 2;
imm_chars.next();
}
'o' | 'q' => {
radix = 8;
imm_chars.next();
}
'd' => {
radix = 10;
imm_chars.next();
}
_ => radix = 10,
}
} else {
return Ok(0);
}
} else if c == '$' {
radix = 16;
imm_chars.next();
}
} else {
return Err("Found empty literal");
}
let imm_string: String = imm_chars.collect();
match i32::from_str_radix(&imm_string, radix) {
Ok(x) => Ok(sign * x),
Err(_) => Err("String decode failed"),
}
}
#[cfg(test)]
mod test {
use super::{imm_string_to_i32, Scanner};
use crate::token::TokenType;
#[test]
fn empty_source() {
let source = "".to_string();
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
assert_eq!(tokens.len(), 0)
}
fn generic_type_test(source: String, token_type: TokenType, expected_lexeme: &str) {
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
assert_eq!(tokens.len(), 1);
if let Some(token) = tokens.get(0) {
if token.get_type() == token_type {
assert_eq!(token.get_lexeme(), expected_lexeme)
} else {
panic!("Incorrect token type!")
}
} else {
panic!("Tokens empty")
}
}
#[test]
fn directive() {
let source = ".bss".to_string();
generic_type_test(source, TokenType::Directive, ".bss")
}
#[test]
fn register() {
let source = "t0".to_string();
generic_type_test(source, TokenType::Register(5), "t0")
}
#[test]
fn opcode() {
let source = "add".to_string();
generic_type_test(source, TokenType::Opcode, "add")
}
#[test]
fn identifier() {
let source = "fuad".to_string();
generic_type_test(source, TokenType::Identifier, "fuad")
}
#[test]
fn number() {
let source = "123".to_string();
generic_type_test(source, TokenType::Number(123), "123")
}
#[test]
fn string() {
let source = "\"fuad\"".to_string();
generic_type_test(source, TokenType::String, "fuad")
}
#[test]
fn new_line() {
let source = "fuad\nfuad".to_string();
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
assert_eq!(tokens.len(), 3);
assert_eq!(tokens.get(0).unwrap().get_type(), TokenType::Identifier);
assert_eq!(tokens.get(2).unwrap().get_type(), TokenType::Identifier);
}
#[test]
fn comment() {
{
let source = "# fuad ismail".to_string();
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
assert_eq!(tokens.len(), 0)
}
{
let source = "; fuad ismail".to_string();
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
assert_eq!(tokens.len(), 0)
}
{
let source = "123 # fuad ismail \n hello".to_string();
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
assert_eq!(tokens.len(), 3);
assert_eq!(tokens.get(0).unwrap().get_type(), TokenType::Number(123));
assert_eq!(tokens.get(2).unwrap().get_type(), TokenType::Identifier);
}
}
#[test]
fn all() {
let source = "
add t0, 16(t1), 2 # operation statement
# with Register, MemAddr, and Number expressions
test: # label statement
.string \"success\" # operation statement with String expression
"
.to_string();
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
assert_eq!(tokens.len(), 18);
assert_eq!(tokens.get(1).unwrap().get_type(), TokenType::Opcode);
assert_eq!(tokens.get(2).unwrap().get_type(), TokenType::Register(5));
assert_eq!(tokens.get(3).unwrap().get_type(), TokenType::Comma);
assert_eq!(tokens.get(9).unwrap().get_type(), TokenType::Number(2));
assert_eq!(tokens.get(12).unwrap().get_type(), TokenType::Identifier);
assert_eq!(tokens.get(13).unwrap().get_type(), TokenType::Colon);
assert_eq!(tokens.get(15).unwrap().get_type(), TokenType::Directive);
assert_eq!(tokens.get(16).unwrap().get_type(), TokenType::String);
assert_eq!(tokens.get(17).unwrap().get_type(), TokenType::LineBreak);
}
#[test]
fn test_imm_string_to_i32() {
let imm_string = "200";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 200);
let imm_string = "0200";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 200);
let imm_string = "0d200";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 200);
let imm_string = "$0c8";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 0xc8);
let imm_string = "0xc8";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 0xc8);
let imm_string = "0hc8";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 0xc8);
let imm_string = "0o310";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 0o310);
let imm_string = "0q310";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 0o310);
let imm_string = "0b11001000";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 0b11001000);
let imm_string = "0y11001000";
assert_eq!(imm_string_to_i32(imm_string).unwrap(), 0b11001000);
}
}