use super::Value as Obj;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
TemplateKeyword, InsertKeyword, BlockKeyword, EndBlockKeyword, ExportKeyword, PlaceholderKeyword,
LetKeyword, ForKeyword, InKeyword, IfKeyword, MatchKeyword, CaseKeyword, OutputKeyword, EndIfKeyword, EndForKeyword, EndMatchKeyword, EndCaseKeyword, WhileKeyword, EndWhileKeyword, DelKeyword,
Identifier(String), Object(Obj), HtmlContent(String),
Dot,
Assignment, PlusAssignment, MinusAssignment, MultiplyAssignment, DivideAssignment, ModulusAssignment,
Increment, Decrement,
Plus, Minus, Multiply, Divide, Modulus, Exponent,
EqualsEquals, NotEquals, LessThan, LessThanEquals, GreaterThan, GreaterThanEquals,
LogicalAnd, LogicalOr, LogicalNot,
LeftParen, RightParen, LeftSquareBracket, RightSquareBracket,
EndOfStatement, }
pub struct Lexer {
input: String,
pos: usize,
}
impl Lexer {
pub fn new(input: String) -> Self {
Lexer { input, pos: 0 }
}
pub fn peek(&self) -> Option<char> {
self.input[self.pos..].chars().next()
}
pub fn peek_str(&self, s: &str) -> bool {
self.input[self.pos..].starts_with(s)
}
pub fn next_char(&mut self) -> Option<char> {
if let Some(ch) = self.peek() {
self.pos += ch.len_utf8();
Some(ch)
} else {
None
}
}
pub fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek() {
if ch.is_whitespace() {
self.next_char();
} else {
break;
}
}
}
pub fn peek_next_is_digit(&self) -> bool {
let mut iter = self.input[self.pos..].chars();
iter.next(); if let Some(next_ch) = iter.next() {
next_ch.is_digit(10)
} else {
false
}
}
pub fn lex_directive(&mut self) -> Vec<Token> {
let mut tokens = Vec::new();
self.skip_whitespace();
while self.pos < self.input.len() && !self.peek_str("]-") {
self.skip_whitespace();
if self.peek_str("]-") {
break;
}
let token = self.lex_directive_token();
tokens.push(token);
self.skip_whitespace();
}
if self.peek_str("]-") {
self.pos += 2;
}
tokens.push(Token::EndOfStatement);
tokens
}
pub fn lex_directive_token(&mut self) -> Token {
self.skip_whitespace();
if self.peek_str("]-") {
return Token::EndOfStatement;
}
if let Some(ch) = self.peek() {
if ch == '"' {
return self.lex_string();
}
if ch.is_digit(10) || (ch == '-' && self.peek_next_is_digit()) {
return self.lex_number();
}
if ch.is_alphabetic() || ch == '_' {
return self.lex_identifier_or_keyword();
}
if self.peek_str("==") {
self.pos += 2;
return Token::EqualsEquals;
}
if self.peek_str("!=") {
self.pos += 2;
return Token::NotEquals;
}
if self.peek_str("<=") {
self.pos += 2;
return Token::LessThanEquals;
}
if self.peek_str(">=") {
self.pos += 2;
return Token::GreaterThanEquals;
}
if self.peek_str("+=") {
self.pos += 2;
return Token::PlusAssignment;
}
if self.peek_str("-=") {
self.pos += 2;
return Token::MinusAssignment;
}
if self.peek_str("*=") {
self.pos += 2;
return Token::MultiplyAssignment;
}
if self.peek_str("/=") {
self.pos += 2;
return Token::DivideAssignment;
}
if self.peek_str("%=") {
self.pos += 2;
return Token::ModulusAssignment;
}
if self.peek_str("++") {
self.pos += 2;
return Token::Increment;
}
if self.peek_str("--") {
self.pos += 2;
return Token::Decrement;
}
if self.peek_str("**") {
self.pos += 2;
return Token::Exponent;
}
if self.peek_str("&&") {
self.pos += 2;
return Token::LogicalAnd;
}
if self.peek_str("||") {
self.pos += 2;
return Token::LogicalOr;
}
let ch = self.next_char().unwrap(); match ch {
'=' => Token::Assignment,
'+' => Token::Plus,
'-' => Token::Minus,
'*' => Token::Multiply,
'/' => Token::Divide,
'%' => Token::Modulus,
'<' => Token::LessThan,
'>' => Token::GreaterThan,
'!' => Token::LogicalNot,
'(' => Token::LeftParen,
')' => Token::RightParen,
'[' => Token::LeftSquareBracket,
']' => Token::RightSquareBracket,
'.' => Token::Dot,
_ => Token::Identifier(ch.to_string()),
}
} else {
Token::EndOfStatement
}
}
pub fn lex_string(&mut self) -> Token {
self.next_char();
let mut s = String::new();
while let Some(ch) = self.next_char() {
if ch == '"' {
break;
}
if ch == '\\' {
if let Some(escaped) = self.next_char() {
match escaped {
'n' => s.push('\n'),
't' => s.push('\t'),
'r' => s.push('\r'),
'\\' => s.push('\\'),
'"' => s.push('"'),
other => s.push(other),
}
}
} else {
s.push(ch);
}
}
Token::Object(Obj::Str(s))
}
pub fn lex_number(&mut self) -> Token {
let start = self.pos;
let mut dot_encountered = false;
if self.peek() == Some('-') {
self.next_char();
}
while let Some(ch) = self.peek() {
if ch.is_digit(10) {
self.next_char();
} else if ch == '.' && !dot_encountered {
dot_encountered = true;
self.next_char();
} else {
break;
}
}
let number_str = &self.input[start..self.pos];
if let Ok(num) = number_str.parse::<f64>() {
Token::Object(Obj::Numerical(num))
} else {
Token::Identifier(number_str.to_string())
}
}
pub fn lex_identifier_or_keyword(&mut self) -> Token {
let start = self.pos;
while let Some(ch) = self.peek() {
if ch.is_alphanumeric() || ch == '_' {
self.next_char();
} else {
break;
}
}
let word = &self.input[start..self.pos];
if word == "true" {
return Token::Object(Obj::Boolean(true));
} else if word == "false" {
return Token::Object(Obj::Boolean(false));
}
match word {
"template" => Token::TemplateKeyword,
"insert" => Token::InsertKeyword,
"block" => Token::BlockKeyword,
"endblock" => Token::EndBlockKeyword,
"export" => Token::ExportKeyword,
"placeholder" => Token::PlaceholderKeyword,
"let" => Token::LetKeyword,
"for" => Token::ForKeyword,
"in" => Token::InKeyword,
"if" => Token::IfKeyword,
"output" => Token::OutputKeyword,
"endif" => Token::EndIfKeyword,
"endfor" => Token::EndForKeyword,
"while" => Token::WhileKeyword,
"endwhile" => Token::EndWhileKeyword,
"del" => Token::DelKeyword,
"match" => Token::MatchKeyword,
"endmatch" => Token::EndMatchKeyword,
"case" => Token::CaseKeyword,
"endcase" => Token::EndCaseKeyword,
_ => Token::Identifier(word.to_string()),
}
}
}
pub fn tokenize<S: Into<String>>(input: S) -> Vec<Token> {
let input_str = input.into();
let mut lexer = Lexer::new(input_str);
let mut tokens = Vec::new();
while lexer.pos < lexer.input.len() {
if lexer.peek_str("-[") {
lexer.pos += 2; let directive_tokens = lexer.lex_directive();
tokens.extend(directive_tokens);
} else {
let start = lexer.pos;
while lexer.pos < lexer.input.len() && !lexer.peek_str("-[") {
lexer.next_char();
}
let html_content = lexer.input[start..lexer.pos].to_string();
if !html_content.is_empty() {
tokens.push(Token::HtmlContent(html_content));
}
}
}
tokens
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize() {
let input = r#"
-[ template "template.html" ]-
-[ block header ]-
<script src="pmine.org"></script>
-[ endblock ]-
-[ block body ]-
-[ let a = 1 ]-
-[ for str in list ]-
-[ if (a % 2 == 0) ]-
-[ output str ]-
-[ endif ]-
-[ a = a + 1 ]-
-[ endfor ]-
-[ endblock ]-
"#;
let tokens = tokenize(input);
println!("{:?}", tokens);
}
}