#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Token {
Term(String),
Eq,
Open,
Close,
Eof,
}
#[must_use]
pub fn tokenize(input: &str) -> Vec<Token> {
#[derive(PartialEq)]
enum State {
Whitespace,
Comment,
Term,
Quoted,
}
let mut tokens = Vec::new();
let mut state = State::Whitespace;
let mut buf = String::new();
for ch in input.chars() {
match state {
State::Whitespace => match ch {
'#' => state = State::Comment,
'"' => state = State::Quoted,
' ' | '\t' | '\r' | '\n' => {}
'=' => tokens.push(Token::Eq),
'{' => tokens.push(Token::Open),
'}' => tokens.push(Token::Close),
_ => {
buf.push(ch);
state = State::Term;
}
},
State::Quoted => {
if ch == '"' {
tokens.push(Token::Term(buf.clone()));
buf.clear();
state = State::Whitespace;
} else {
buf.push(ch);
}
}
State::Comment => {
if ch == '\n' {
state = State::Whitespace;
}
}
State::Term => match ch {
'#' | '\n' | ' ' | '\t' | '\r' => {
let term = buf.trim().to_owned();
if !term.is_empty() {
tokens.push(Token::Term(term));
}
buf.clear();
state = if ch == '#' {
State::Comment
} else {
State::Whitespace
};
}
'=' => {
let term = buf.trim().to_owned();
if !term.is_empty() {
tokens.push(Token::Term(term));
}
buf.clear();
tokens.push(Token::Eq);
state = State::Whitespace;
}
'{' => {
let term = buf.trim().to_owned();
if !term.is_empty() {
tokens.push(Token::Term(term));
}
buf.clear();
tokens.push(Token::Open);
state = State::Whitespace;
}
'}' => {
let term = buf.trim().to_owned();
if !term.is_empty() {
tokens.push(Token::Term(term));
}
buf.clear();
tokens.push(Token::Close);
state = State::Whitespace;
}
_ => buf.push(ch),
},
}
}
if state == State::Term {
let term = buf.trim().to_owned();
if !term.is_empty() {
tokens.push(Token::Term(term));
}
}
tokens.push(Token::Eof);
tokens
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_kv() {
let toks = tokenize("key = value");
assert_eq!(
toks,
vec![
Token::Term("key".into()),
Token::Eq,
Token::Term("value".into()),
Token::Eof,
]
);
}
#[test]
fn quoted_value() {
let toks = tokenize(r#"key = "hello world""#);
assert_eq!(
toks,
vec![
Token::Term("key".into()),
Token::Eq,
Token::Term("hello world".into()),
Token::Eof,
]
);
}
#[test]
fn comment_stripped() {
let toks = tokenize("# comment\nkey = val");
assert_eq!(
toks,
vec![
Token::Term("key".into()),
Token::Eq,
Token::Term("val".into()),
Token::Eof,
]
);
}
#[test]
fn array() {
let toks = tokenize("k = { 1 2 3 }");
assert_eq!(
toks,
vec![
Token::Term("k".into()),
Token::Eq,
Token::Open,
Token::Term("1".into()),
Token::Term("2".into()),
Token::Term("3".into()),
Token::Close,
Token::Eof,
]
);
}
}