1#[derive(Debug, PartialEq, Eq)]
2pub enum TokenType {
3 Illegal,
4 EOF,
5 CrLf,
6 Colon,
7 Space,
8 Word,
9}
10
11pub struct Token {
12 pub token_type: TokenType,
13 pub literal: String,
14}
15
16pub struct Lexer<'a> {
17 input: &'a str,
18 current_char: Option<char>,
19 current_position: usize,
20 read_position: usize,
21}
22
23impl<'a> Lexer<'a> {
24 pub fn new(input: &'a str) -> Self {
25 let mut lexer = Lexer {
26 input,
27 current_char: None,
28 current_position: 0,
29 read_position: 0,
30 };
31
32 lexer.read_char();
33 lexer
34 }
35
36 fn read_char(&mut self) {
37 self.current_char = if self.read_position >= self.input.len() {
38 None
39 } else {
40 Some(self.input.chars().nth(self.read_position).unwrap())
41 };
42 self.current_position = self.read_position;
43 self.read_position += 1;
44 }
45
46 fn read_string(&mut self) -> String {
47 let start = self.current_position;
48 let mut next_char = self.peek_char();
49
50 while next_char != '\r' && next_char != ' ' && next_char != '\0' {
51 self.read_char();
52 next_char = self.peek_char();
53 }
54
55 self.input[start..=self.current_position].to_string()
56 }
57
58 fn peek_char(&self) -> char {
59 if self.read_position >= self.input.len() {
60 '\0'
61 } else {
62 self.input.chars().nth(self.read_position).unwrap()
63 }
64 }
65
66 pub fn next_token(&mut self) -> Token {
67 let token: Token;
68
69 match self.current_char {
70 Some(c) => match c {
71 ':' => {
72 if self.current_position == 0 && self.read_position == 1 {
73 token = Token {
75 token_type: TokenType::Colon,
76 literal: ":".to_string(),
77 };
78 } else {
79 token = Token {
80 token_type: TokenType::Word,
81 literal: self.read_string(),
82 };
83 }
84 }
85 ' ' => {
86 token = Token {
87 token_type: TokenType::Space,
88 literal: c.to_string(),
89 };
90 }
91 '\r' => {
92 if self.peek_char() == '\n' {
93 self.read_char();
94 token = Token {
95 token_type: TokenType::CrLf,
96 literal: "\r\n".to_string(),
97 };
98 } else {
99 token = Token {
100 token_type: TokenType::Illegal,
101 literal: c.to_string(),
102 };
103 }
104 }
105 _ => {
106 token = Token {
107 token_type: TokenType::Word,
108 literal: self.read_string(),
109 };
110 }
111 },
112 None => {
113 token = Token {
114 token_type: TokenType::EOF,
115 literal: "".to_string(),
116 };
117 }
118 }
119
120 self.read_char();
121
122 token
123 }
124}
125
126#[cfg(test)]
127mod tests {
128 use super::*;
129
130 #[test]
131 fn test_next_token() {
132 let input = ":prefix COMMAND arg1 arg2\r\n";
133 let mut lexer = Lexer::new(input);
134
135 let expected_tokens = vec![
136 (TokenType::Colon, ":"),
137 (TokenType::Word, "prefix"),
138 (TokenType::Space, " "),
139 (TokenType::Word, "COMMAND"),
140 (TokenType::Space, " "),
141 (TokenType::Word, "arg1"),
142 (TokenType::Space, " "),
143 (TokenType::Word, "arg2"),
144 (TokenType::CrLf, "\r\n"),
145 (TokenType::EOF, ""),
146 ];
147
148 for (expected_type, expected_literal) in expected_tokens {
149 let token = lexer.next_token();
150 assert_eq!(token.token_type, expected_type);
151 assert_eq!(token.literal, expected_literal);
152 }
153 }
154
155 #[test]
156 fn test_illegal_token() {
157 let input = "COMMAND arg1\r";
158 let mut lexer = Lexer::new(input);
159
160 let expected_tokens = vec![
161 (TokenType::Word, "COMMAND"),
162 (TokenType::Space, " "),
163 (TokenType::Word, "arg1"),
164 (TokenType::Illegal, "\r"),
165 (TokenType::EOF, ""),
166 ];
167
168 for (expected_type, expected_literal) in expected_tokens {
169 let token = lexer.next_token();
170 assert_eq!(token.token_type, expected_type);
171 assert_eq!(token.literal, expected_literal);
172 }
173 }
174}