#![allow(dead_code)]
#[derive(Debug, PartialEq)]
enum LexState {
Seperator,
Token,
Done,
}
pub fn lex_line(line: &str) -> Vec<&str> {
let mut v = Vec::new();
let mut start_index = 0;
let mut stop_index = 0;
let mut state = LexState::Seperator;
for character in line.chars() {
match character {
';' | '\n' | '\r' => {
if state == LexState::Token { v.push(&line[start_index..stop_index]); }
state == LexState::Done;
break;
}
' ' | '\t' => {
if state == LexState::Token { v.push(&line[start_index..stop_index]); }
state = LexState::Seperator;
}
_ => {
if state == LexState::Seperator { start_index = stop_index; }
state = LexState::Token;
}
}
stop_index += 1;
}
if state == LexState::Token {
let new_token = &line[start_index..stop_index];
if v.len() == 0 { v.push(new_token); }
else {
let last_token = v.pop().unwrap();
if new_token != last_token { v.push(last_token); }
v.push(new_token);
}
}
return v;
}
pub fn lex(source: &str) -> Vec<Vec<&str>> {
let mut v = Vec::new();
for line in source.lines() {
v.push(lex_line(line));
};
return v;
}
#[cfg(test)]
mod test_lex {
use super::*;
#[test]
fn test_lex_line_with_comment() {
let result = lex_line("ident1 ident2:ident2more\tident3; comment");
assert_eq!(&result[..], ["ident1", "ident2:ident2more", "ident3"]);
}
#[test]
fn test_lex_line_lexes_only_one_line() {
let result = lex_line("ident1 ident2 \n ident3");
assert_eq!(&result[..], ["ident1", "ident2"]);
}
#[test]
fn test_lex_multiple_lines() {
let result = lex("l1i1 l1i2 ; line 1 comment\nl2i1 l2i2 l2i3 ; line 2 comment");
assert_eq!(&result[..], &[vec!["l1i1", "l1i2"], vec!["l2i1", "l2i2", "l2i3"]]);
}
}