#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TokKind {
Leader,
Comma,
Value,
Newline,
Whitespace,
Comment,
}
#[derive(Debug)]
pub struct Token<'a> {
pub kind: TokKind,
pub text: &'a str,
pub start: usize,
pub end: usize,
}
pub fn tokenize(input: &str) -> Vec<Token<'_>> {
tokenize_with_config(input, '^', '~', ',')
}
pub fn tokenize_with_config(
input: &str,
cmd_prefix: char,
ctrl_prefix: char,
delimiter: char,
) -> Vec<Token<'_>> {
let mut toks = Vec::new();
let mut i = 0usize;
let b = input.as_bytes();
while i < b.len() {
let c = b[i] as char;
let start = i;
if c == cmd_prefix || c == ctrl_prefix {
i += 1;
toks.push(Token {
kind: TokKind::Leader,
text: &input[start..i],
start,
end: i,
});
} else if c == ';' {
while i < b.len() && (b[i] as char) != '\n' {
i += 1;
}
toks.push(Token {
kind: TokKind::Comment,
text: &input[start..i],
start,
end: i,
});
} else if c == delimiter {
i += 1;
toks.push(Token {
kind: TokKind::Comma,
text: &input[start..i],
start,
end: i,
});
} else if c == '\n' {
i += 1;
toks.push(Token {
kind: TokKind::Newline,
text: &input[start..i],
start,
end: i,
});
} else if c.is_ascii_whitespace() {
i += 1;
while i < b.len() && (b[i] as char).is_ascii_whitespace() && (b[i] as char) != '\n' {
i += 1;
}
toks.push(Token {
kind: TokKind::Whitespace,
text: &input[start..i],
start,
end: i,
});
} else {
i += 1;
while i < b.len() {
let ch = b[i] as char;
if ch == delimiter
|| ch == cmd_prefix
|| ch == ctrl_prefix
|| ch == '\n'
|| ch == ';'
{
break;
}
i += 1;
}
toks.push(Token {
kind: TokKind::Value,
text: &input[start..i],
start,
end: i,
});
}
}
toks
}