1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#![allow(dead_code)]
#[derive(Debug, PartialEq)]
enum LexState {
Seperator,
Token,
Done,
}
pub fn lex_line(line: &str) -> Vec<&str> {
let mut v = Vec::new();
let mut start_index = 0;
let mut stop_index = 0;
let mut state = LexState::Seperator;
for character in line.chars() {
match character {
';' | '\n' | '\r' => {
if state == LexState::Token { v.push(&line[start_index..stop_index]); }
state == LexState::Done;
break;
}
' ' | '\t' => {
if state == LexState::Token { v.push(&line[start_index..stop_index]); }
state = LexState::Seperator;
}
_ => {
if state == LexState::Seperator { start_index = stop_index; }
state = LexState::Token;
}
}
stop_index += 1;
}
if state == LexState::Token {
let new_token = &line[start_index..stop_index];
if v.len() == 0 { v.push(new_token); }
else {
let last_token = v.pop().unwrap();
if new_token != last_token { v.push(last_token); }
v.push(new_token);
}
}
return v;
}
pub fn lex(source: &str) -> Vec<Vec<&str>> {
let mut v = Vec::new();
for line in source.lines() {
v.push(lex_line(line));
};
return v;
}
#[cfg(test)]
mod test_lex {
use super::*;
#[test]
fn test_lex_line_with_comment() {
let result = lex_line("ident1 ident2:ident2more\tident3; comment");
assert_eq!(&result[..], ["ident1", "ident2:ident2more", "ident3"]);
}
#[test]
fn test_lex_line_lexes_only_one_line() {
let result = lex_line("ident1 ident2 \n ident3");
assert_eq!(&result[..], ["ident1", "ident2"]);
}
#[test]
fn test_lex_multiple_lines() {
let result = lex("l1i1 l1i2 ; line 1 comment\nl2i1 l2i2 l2i3 ; line 2 comment");
assert_eq!(&result[..], &[vec!["l1i1", "l1i2"], vec!["l2i1", "l2i2", "l2i3"]]);
}
}