#[cfg(test)]
mod tests;
mod utils;
use utils::*;
#[derive(Debug, PartialEq)]
pub enum Token<'a> {
Paren(char),
Symbol(char),
Number(i64),
Regex(String, String),
Comment(&'a str),
Identifier(String),
String(String, bool),
Newline,
Comma,
}
impl Token<'_> {
fn significant(&self) -> bool {
match self {
Token::Number(_) => true,
Token::Paren(_) => true,
Token::Regex(_, _) => true,
Token::Comment(_) => false,
Token::Identifier(_) => true,
Token::String(_, _) => true,
Token::Symbol(_) => true,
Token::Newline => false,
Token::Comma => true,
}
}
}
pub fn lex(buf: &str) -> Result<Vec<Token>, String> {
let tokens = full_lex(buf)?;
Ok(tokens
.into_iter()
.filter(|t| t.significant())
.collect::<Vec<Token>>())
}
pub fn full_lex(buf: &str) -> Result<Vec<Token>, String> {
let mut tokens = Vec::new();
let mut it = buf.chars().enumerate().peekable();
let lower = 'a'..='z';
let upper = 'A'..='Z';
let under_score = &['_'];
let newline_chars = &['\n', '\r', ';'];
let number_chars = '0'..='9';
let regexflag_chars = &['i', 'U'];
let x = [&lower, &upper, &number_chars];
let ident_chars = (Multi(&x), under_score);
while let Some((start, ch)) = it.peek() {
let start = *start;
match ch {
'0'..='9' => {
let end = chomp(&number_chars, &mut it);
tokens.push(Token::Number(get_number(&buf[start..end])));
}
'{' | '[' | '(' | '}' | ']' | ')' => {
tokens.push(Token::Paren(*ch));
it.next();
}
' ' | '\t' => {
it.next();
}
'\n' | '\r' | ';' => {
chomp(&newline_chars, &mut it);
tokens.push(Token::Newline);
}
'#' => {
it.next();
let end = chomp_until(&newline_chars, &mut it);
tokens.push(Token::Comment(&buf[start + 1..end]));
}
',' => {
it.next();
tokens.push(Token::Comma);
}
'/' => {
it.next();
let chars = chomp_until_escaped(
&mut it,
'/',
&[
'{', '}', '[', ']', '.', '^', '$', '*', '+', '?', '|', '(', ')', 'd', 'D',
's', 'S', 'w', 'W', 'p', 'P', 'b', 'B', 'A', 'z', 'a', 'f', 't', 'n', 'r',
'v', 'x', 'u', 'U', '\\',
],
)?;
let pattern = chars;
if let Some((_, '/')) = it.next() {
} else {
return Err("expected character: '/'".to_string());
}
let flags = chomp_str(®exflag_chars, &mut it);
tokens.push(Token::Regex(pattern, flags));
}
'"' => {
it.next();
let content = chomp_until_escaped(&mut it, '"', &['$'])?;
it.next();
tokens.push(Token::String(content, true));
}
'\'' => {
it.next();
let content = chomp_until_escaped(&mut it, '\'', &[])?;
it.next();
tokens.push(Token::String(content, false));
}
'_' | 'a'..='z' | 'A'..='Z' => {
let content = chomp_str(&ident_chars, &mut it);
tokens.push(Token::Identifier(content));
}
'^' | '$' | '!' | '&' | '|' => {
tokens.push(Token::Symbol(*ch));
it.next();
}
a => {
return Err(format!("unknown character: '{}'", a));
}
}
}
Ok(tokens)
}