use regex::Regex;
use crate::{token::{Token, TokenType}, GsblError};
fn is_skippable(c: char) -> bool {
c == ' ' || c == '\n' || c == '\t' || c == ';'
}
pub(crate) fn tokenize(src: &str) -> Result<Vec<Token>, GsblError> {
let mut result = Vec::new();
let mut remaining = src;
let patterns = vec![
(TokenType::Float, Regex::new(r"^\d+\.\d+").unwrap()),
(TokenType::Int, Regex::new(r"^\d+").unwrap()),
(TokenType::Bool, Regex::new(r"^(true|false)").unwrap()),
(TokenType::Identifier, Regex::new(r"^[a-zA-Z_]\w*").unwrap()),
(TokenType::Comma, Regex::new(r"^,").unwrap()),
(TokenType::Equals, Regex::new(r"^\=").unwrap()),
(TokenType::Comment, Regex::new(r"^\/\/.*").unwrap()),
(TokenType::MultilineComment, Regex::new(r"(?s)^\/\*.*\*\/").unwrap()),
(TokenType::OpenBrace, Regex::new(r"^\{").unwrap()),
(TokenType::CloseBrace, Regex::new(r"^\}").unwrap()),
(TokenType::OpenBracket, Regex::new(r"^\[").unwrap()),
(TokenType::CloseBracket, Regex::new(r"^\]").unwrap()),
(TokenType::String, Regex::new(r#"^(['"])(.*?)(['"])"#).unwrap()),
];
while !remaining.is_empty() {
if let Some(pos) = remaining.find(|c| !is_skippable(c)) {
remaining = &remaining[pos..];
} else {
break;
}
let mut matched = false;
for (token_type, regex) in &patterns {
if let Some(mat) = regex.find(remaining) {
let value = mat.as_str().to_string();
result.push(Token {
token_type: token_type.clone(),
value: if *token_type != TokenType::String {
value
} else {
value[1..value.len() - 1].to_string()
}
});
remaining = &remaining[mat.end()..];
matched = true;
break;
}
}
if !matched {
result.push(Token {
token_type: TokenType::Unknown,
value: remaining.chars().next().unwrap().to_string()
});
remaining = &remaining[1..];
}
}
result.push(Token {
token_type: TokenType::EndOfFile,
value: "EOF".to_string(),
});
Ok(result)
}