use crate::lexers::Token;
pub struct Lexer {
input: Vec<char>,
pub position: usize,
pub read_position: usize,
pub ch: char,
}
fn is_letter(ch: char) -> bool {
ch.is_alphabetic() || ch == '_'
}
impl Lexer {
pub fn new(input: Vec<char>) -> Self {
Self {
input,
position: 0,
read_position: 0,
ch: '\0',
}
}
pub fn read_char(&mut self) {
if self.read_position >= self.input.len() {
self.ch = '\0';
} else {
self.ch = self.input[self.read_position];
}
self.position = self.read_position;
self.read_position += 1;
}
pub fn next_token(&mut self) -> Token {
let read_identifier = |l: &mut Lexer| -> Vec<char> {
let position = l.position;
while l.position < l.input.len() && is_letter(l.ch) {
l.read_char();
if l.ch == '-' {
l.read_char();
}
}
l.input[position..l.position].to_vec()
};
let read_string = |l: &mut Lexer, ch: char| -> Vec<char> {
let position = l.position;
l.read_char();
while l.position < l.input.len() && l.ch != ch {
if l.ch == '\\' {
l.read_char();
}
l.read_char();
}
l.read_char();
if l.position > l.input.len() {
l.position -= 1;
l.read_position -= 1;
}
l.input[position..l.position].to_vec()
};
let read_number = |l: &mut Lexer| -> Vec<char> {
let position = l.position;
while l.position < l.input.len() && l.ch.is_numeric() {
l.read_char();
}
l.input[position..l.position].to_vec()
};
let tok: Token;
if self.ch == '$' {
let start_position = self.position;
while self.position < self.input.len()
&& !is_letter(self.ch)
&& !self.ch.is_numeric()
&& self.ch != '\n'
&& self.ch != ' '
{
self.read_char();
}
let identifier = self.input[start_position..self.position].to_vec();
return Token::IDENT(identifier);
}
if self.ch == '<' {
let next_ch = self.input[self.position + 1];
if self.position + 5 < self.input.len()
&& next_ch == '<'
&& self.input[self.position + 2] == 'E'
&& self.input[self.position + 3] == 'O'
&& self.input[self.position + 4] == 'F'
{
let mut comment = String::from("<<EOF").chars().collect::<Vec<_>>();
self.read_char();
self.read_char();
self.read_char();
self.read_char();
self.read_char();
let last_position = self.position;
while self.position < self.input.len() {
if self.ch == 'E' {
if self.input[self.position + 1] == 'O' {
if self.input[self.position + 2] == 'F' {
self.read_char();
self.read_char();
self.read_char();
break;
}
}
}
self.read_char();
}
comment.append(&mut self.input[last_position..self.position].to_vec());
return Token::STRING(comment);
}
}
if self.ch == '#' {
return Token::COMMENT(read_string(self, '\n'));
}
match self.ch {
'\n' => {
tok = Token::ENDL(self.ch);
}
'\0' => {
tok = Token::EOF;
}
'!' => {
tok = Token::KEYWORD(vec![self.ch]);
}
'[' => {
tok = Token::KEYWORD(vec![self.ch]);
}
']' => {
tok = Token::KEYWORD(vec![self.ch]);
}
'|' => {
tok = Token::KEYWORD(vec![self.ch]);
}
'-' => {
let next_ch = self.input[self.position + 1];
if is_letter(next_ch) || next_ch == '-' {
let mut identifier = vec![self.ch];
self.read_char();
if self.input[self.position + 1] == '-' {
self.read_char();
identifier.append(&mut vec![self.ch]);
}
identifier.append(&mut read_identifier(self));
return Token::KEYWORD(identifier);
}
tok = Token::CH(self.ch);
}
_ => {
return if is_letter(self.ch) {
#[allow(unused_variables)]
let start_position = self.position;
#[allow(unused_mut)]
let mut identifier: Vec<char> = read_identifier(self);
if self.ch.is_numeric() {
let position = self.position;
while self.position < self.input.len() {
if !self.ch.is_numeric() && !is_letter(self.ch) {
break;
}
self.read_char();
}
identifier.append(&mut self.input[position..self.position].to_vec());
}
match get_keyword_token(&identifier) {
Ok(keyword_token) => keyword_token,
Err(_) => {
if self.ch == '(' {
return Token::ENTITY(identifier);
} else if self.ch.is_whitespace() {
let mut position = self.position;
let mut ch = self.input[position];
while position < self.input.len() && ch.is_whitespace() {
position += 1;
if position < self.input.len() {
ch = self.input[position];
}
}
if ch == '(' {
return Token::ENTITY(identifier);
}
}
Token::IDENT(identifier)
}
}
} else if self.ch.is_numeric() {
let identifier: Vec<char> = read_number(self);
Token::INT(identifier)
} else if self.ch == '\'' {
if self.position - 1 > 0 && self.input[self.position - 1] == '/' {
tok = Token::CH(self.ch);
self.read_char();
return tok;
}
let str_value: Vec<char> = read_string(self, '\'');
Token::STRING(str_value)
} else if self.ch == '"' {
let str_value: Vec<char> = read_string(self, '"');
Token::STRING(str_value)
} else {
Token::ILLEGAL
}
}
}
self.read_char();
tok
}
}
pub fn get_keyword_token(identifier: &Vec<char>) -> Result<Token, String> {
let id: String = identifier.into_iter().collect();
match &id[..] {
"builtin" | "command" | "compgen" | "echo" | "eval" | "exit" | "false" | "hash"
| "kill" | "read" | "source" | "unset" | "test" | "true" | "printf" => {
Ok(Token::CONSTANT(identifier.clone()))
}
"alias" | "bg" | "bind" | "break" | "caller" | "case" | "cd" | "complete" | "compopt"
| "continue" | "coproc" | "declare" | "dirs" | "disown" | "enable" | "exec" | "export"
| "fc" | "fg" | "for" | "function" | "getopts" | "help" => {
Ok(Token::KEYWORD(identifier.clone()))
}
_ => Err(String::from("Not a keyword")),
}
}
pub fn render_html(input: Vec<char>) -> String {
let mut l = Lexer::new(input);
l.read_char();
let mut html = String::new();
let mut line = 1;
html.push_str("<table class=\"highlight-table\">\n");
html.push_str("<tbody>\n");
html.push_str("<tr>");
html.push_str(&format!(
"<td class=\"hl-num\" data-line=\"{}\"></td><td>",
line
));
loop {
let token = l.next_token();
if token == Token::EOF {
html.push_str("</td></tr>\n");
break;
}
match token {
Token::INT(value) => {
html.push_str(&format!(
"<span class=\"hl-c\">{}</span>",
value.iter().collect::<String>()
));
}
Token::IDENT(value) => {
html.push_str(&value.iter().collect::<String>());
}
Token::STRING(value) => {
let mut s = String::new();
for ch in value {
if ch == '<' {
s.push_str("<");
} else if ch == '>' {
s.push_str(">");
} else {
s.push(ch);
}
}
s = s.replace("<<", "<span class=\"hl-k\"><<</span>");
s = s.replace("EOF", "<span class=\"hl-k\">EOF</span>");
let split = s.split("\n");
let split_len = split.clone().collect::<Vec<&str>>().len();
let mut index = 0;
for val in split {
html.push_str(&format!("<span class=\"hl-s\">{}</span>", val));
index = index + 1;
if index != split_len {
line = line + 1;
html.push_str("</td></tr>\n");
html.push_str(&format!(
"<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
line
));
}
}
}
Token::CH(value) => {
html.push(value);
}
Token::ENTITY(value) => {
html.push_str(&format!(
"<span class=\"hl-en\">{}</span>",
value.iter().collect::<String>()
));
}
Token::CONSTANT(value) => {
html.push_str(&format!(
"<span class=\"hl-c\">{}</span>",
value.iter().collect::<String>()
));
}
Token::KEYWORD(value) => {
html.push_str(&format!(
"<span class=\"hl-k\">{}</span>",
value.iter().collect::<String>()
));
}
Token::COMMENT(value) => {
let mut lines = String::new();
for ch in value {
if ch == '<' {
lines.push_str("<");
} else if ch == '>' {
lines.push_str(">");
} else {
lines.push(ch);
}
}
let split = lines.split("\n");
let split_len = split.clone().collect::<Vec<&str>>().len();
let mut index = 0;
for val in split {
if val.len() > 1 {
html.push_str(&format!("<span class=\"hl-cmt\">{}</span>", val));
}
index = index + 1;
if index != split_len {
line = line + 1;
html.push_str("</td></tr>\n");
html.push_str(&format!(
"<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
line
));
}
}
}
Token::ENDL(_) => {
line = line + 1;
html.push_str("</td></tr>\n");
html.push_str(&format!(
"<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
line
));
}
_ => {
html.push(l.ch);
l.read_char();
}
}
}
html.push_str("</tbody>\n");
html.push_str("</table>");
html
}