use std::vec::Vec;
use std::string::String;
#[repr(u8)]
#[derive(Debug, PartialEq, Clone)]
pub enum Keyword {
EOF,
HASH,
AT,
CROSS,
TEXT,
IDENT,
LABEL,
NUMBER,
DELIMITER,
}
impl Keyword {
pub fn to_str(&self) -> &'static str {
match self {
Keyword::EOF => "EOF",
Keyword::HASH => "HASH",
Keyword::AT => "AT",
Keyword::CROSS => "CROSS",
Keyword::TEXT => "TEXT",
Keyword::IDENT => "IDENT",
Keyword::LABEL => "LABEL",
Keyword::NUMBER => "NUMBER",
Keyword::DELIMITER => "DELIMITER",
}
}
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: Keyword,
pub token_literal: String,
pub position: (usize, usize),
}
pub trait LexerTrait {
fn next_token(&mut self) -> Token;
fn peek_token(&self) -> Token;
}
pub struct Lexer {
input: String,
cur_pos: usize,
peek_pos: usize,
tokens: Vec<Token>,
tok_pos: usize,
}
impl LexerTrait for Lexer {
fn next_token(&mut self) -> Token {
if self.tok_pos >= self.tokens.len() {
return self.tokens[self.tokens.len()-1].clone();
}
let t = self.tokens[self.tok_pos].clone();
self.tok_pos += 1;
t
}
fn peek_token(&self) -> Token {
let len = self.tokens.len();
if self.tok_pos >= len {
let i = self.tokens.len()-1;
return self.tokens[i].clone();
}
let i = self.tok_pos;
let t = self.tokens[i].clone();
t
}
}
impl Lexer {
pub fn new(input: &str) -> impl LexerTrait {
let mut l = Lexer {input: String::from(input), cur_pos: 0, peek_pos: 0, tokens: Vec::new(), tok_pos: 0};
l.init();
l.lex();
l
}
fn init(&mut self) {
if self.input.len() <= 1 {
self.peek_pos = self.cur_pos;
} else {
self.peek_pos = self.cur_pos + 1;
}
}
#[inline(always)]
fn get_char(&self) -> String {
String::from(&self.input[self.cur_pos..self.cur_pos+1])
}
#[inline(always)]
fn get_peek(&self) -> String {
String::from(&self.input[self.peek_pos..self.peek_pos+1])
}
fn read_char(&mut self) {
if self.cur_pos+1 > self.input.len() {
return
}
self.cur_pos += 1;
if self.peek_pos+1 >= self.input.len() {
self.peek_pos = self.cur_pos;
} else {
self.peek_pos += 1;
}
}
#[inline(always)]
fn eof(&self) -> bool {
return self.cur_pos >= self.input.len();
}
fn eat_whitespace(&mut self) {
while !self.eof() {
let ch = self.get_char();
if ch.as_str() != " " && ch.as_str() != "\t" {
break
}
self.read_char();
}
}
fn push(&mut self, tok: Token) {
self.tokens.push(tok);
}
fn parse_text(&mut self) -> Token {
let mut literal = String::new();
let start = self.cur_pos;
while !self.eof() {
let ch = self.get_char();
if ch.as_str() == "\n" && (self.get_peek().as_str() == "\n" || self.get_peek().as_str() == "#") {
literal += ch.as_str();
self.read_char();
break
}
literal += ch.as_str();
self.read_char();
}
return Token{token_type: Keyword::TEXT, token_literal: literal, position: (start, self.cur_pos)};
}
fn parse_ident(&mut self) -> Token {
let mut literal = String::new();
let start = self.cur_pos;
while !self.eof() {
let ch = self.get_char();
if ch.as_str() == "\n" {
break
}
literal += ch.as_str();
self.read_char();
}
return Token{token_type: Keyword::IDENT, token_literal: literal, position: (start, self.cur_pos)};
}
fn parse_label(&mut self) -> Token {
let mut literal = String::new();
let start = self.cur_pos;
while !self.eof() {
let ch = self.get_char();
if ch.as_str() == " " {
break
}
literal += ch.as_str();
self.read_char();
}
return Token{token_type: Keyword::LABEL, token_literal: literal, position: (start, self.cur_pos)};
}
fn parse_number(&mut self) -> Token {
let mut literal = String::new();
let start = self.cur_pos;
while !self.eof() {
let ch = self.get_char();
if !is_digit(ch.as_str()) && ch.as_str() != "." {
break
}
literal += ch.as_str();
self.read_char();
}
return Token{token_type: Keyword::NUMBER, token_literal: literal, position: (start, self.cur_pos)};
}
fn lex(&mut self){
while !self.eof() {
self.eat_whitespace();
let ch = self.get_char();
let start = self.cur_pos;
let tok = match ch.as_str() {
"#" => {
self.read_char();
Token{token_type: Keyword::HASH, token_literal: String::from(ch), position: (start, self.cur_pos)}
}
"x" => {
self.read_char();
Token{token_type: Keyword::CROSS, token_literal: String::from(ch), position: (start, self.cur_pos)}
}
"@" => {
self.read_char();
Token{token_type: Keyword::AT, token_literal: String::from(ch), position: (start, self.cur_pos)}
}
"\n" => {
self.read_char();
Token{token_type: Keyword::DELIMITER, token_literal: String::from(ch), position: (start, self.cur_pos)}
}
_ if self.tokens.len() > 0 && self.tokens[self.tokens.len()-1].token_type == Keyword::HASH => self.parse_ident(),
_ if self.tokens.len() > 0 && self.tokens[self.tokens.len()-1].token_type == Keyword::NUMBER => self.parse_label(),
_ if is_digit(ch.as_str()) => self.parse_number(),
_ => self.parse_text(),
};
self.push(tok);
}
self.push(Token{token_type: Keyword::EOF, token_literal: String::from(""), position: (self.cur_pos, self.cur_pos)});
}
}
fn is_digit(s: &str) -> bool {
return s >= "0" && s <= "9";
}