1use crate::token::{Token, TokenKind};
2
3pub struct Lexer<'a> {
4 input: &'a str,
5 position: usize,
6 read_position: usize,
7 ch: Option<u8>,
8}
9
10impl<'a> Lexer<'a> {
11 pub fn new(src: &'a str) -> Self {
12 let mut lexer = Lexer {
13 input: src,
14 position: 0,
15 read_position: 0,
16 ch: None,
17 };
18
19 lexer.read_char();
20 lexer
21 }
22
23 pub fn next_token(&mut self) -> Token {
24 let token = match self.ch {
25 Some(b'{') => Token {
26 kind: TokenKind::LeftCurlyBrace,
27 literal: "{",
28 },
29 Some(b'}') => Token {
30 kind: TokenKind::RightCurlyBrace,
31 literal: "}",
32 },
33 Some(b'(') => Token {
34 kind: TokenKind::LeftParen,
35 literal: "(",
36 },
37 Some(b')') => Token {
38 kind: TokenKind::RightParen,
39 literal: ")",
40 },
41 Some(b'[') => Token {
42 kind: TokenKind::LeftSquareBracket,
43 literal: "[",
44 },
45 Some(b']') => Token {
46 kind: TokenKind::RightSquareBracket,
47 literal: "]",
48 },
49 Some(b',') => Token {
50 kind: TokenKind::Comma,
51 literal: ",",
52 },
53 Some(b';') => Token {
54 kind: TokenKind::Semicolon,
55 literal: ";",
56 },
57 Some(b'\n') => Token {
58 kind: TokenKind::NewLine,
59 literal: "<newline>",
60 },
61 Some(b'+') => Token {
62 kind: TokenKind::Plus,
63 literal: "+",
64 },
65 Some(b'-') => Token {
66 kind: TokenKind::Minus,
67 literal: "-",
68 },
69 Some(b'*') => Token {
70 kind: TokenKind::Asterisk,
71 literal: "*",
72 },
73 Some(b'%') => Token {
74 kind: TokenKind::Percent,
75 literal: "%",
76 },
77 Some(b'^') => Token {
78 kind: TokenKind::Caret,
79 literal: "^",
80 },
81 Some(b'!') => Token {
82 kind: TokenKind::ExclamationMark,
83 literal: "!",
84 },
85 Some(b'>') => Token {
86 kind: TokenKind::GreaterThan,
87 literal: ">",
88 },
89 Some(b'<') => Token {
90 kind: TokenKind::LessThan,
91 literal: "<",
92 },
93 Some(b'|') => Token {
94 kind: TokenKind::Pipe,
95 literal: "|",
96 },
97 Some(b'?') => Token {
98 kind: TokenKind::QuestionMark,
99 literal: "?",
100 },
101 Some(b':') => Token {
102 kind: TokenKind::Colon,
103 literal: ":",
104 },
105 Some(b'~') => Token {
106 kind: TokenKind::Tilde,
107 literal: "~",
108 },
109 Some(b'$') => Token {
110 kind: TokenKind::DollarSign,
111 literal: "$",
112 },
113 Some(b'=') => Token {
114 kind: TokenKind::Equal,
115 literal: "=",
116 },
117 None => Token {
118 kind: TokenKind::Eof,
119 literal: "",
120 },
121 _ => Token {
122 kind: TokenKind::Illegal,
123 literal: "<illegal>",
124 },
125 };
126
127 self.read_char();
128 token
129 }
130
131 fn read_char(&mut self) {
132 if self.read_position >= self.input.len() {
133 self.ch = None;
134 } else {
135 self.ch = Some(self.input.as_bytes()[self.read_position]);
136 }
137 self.position = self.read_position;
138 self.read_position += 1;
139 }
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[test]
147 fn next_left_curly_brace_token() {
148 let expected = Token {
149 kind: TokenKind::LeftCurlyBrace,
150 literal: "{",
151 };
152 let input = "{";
153 let mut lexer = Lexer::new(input);
154
155 let token = lexer.next_token();
156
157 assert_eq!(expected, token);
158 }
159
160 #[test]
161 fn next_right_curly_brace_token() {
162 let expected = Token {
163 kind: TokenKind::RightCurlyBrace,
164 literal: "}",
165 };
166 let input = "}";
167 let mut lexer = Lexer::new(input);
168
169 let token = lexer.next_token();
170
171 assert_eq!(expected, token);
172 }
173
174 #[test]
175 fn next_one_character_token() {
176 let input = "{}()[],;\n+-*%^!><|?:~$=";
177 let mut lexer = Lexer::new(input);
178
179 let expected_tokens = vec![
180 Token {
181 kind: TokenKind::LeftCurlyBrace,
182 literal: "{",
183 },
184 Token {
185 kind: TokenKind::RightCurlyBrace,
186 literal: "}",
187 },
188 Token {
189 kind: TokenKind::LeftParen,
190 literal: "(",
191 },
192 Token {
193 kind: TokenKind::RightParen,
194 literal: ")",
195 },
196 Token {
197 kind: TokenKind::LeftSquareBracket,
198 literal: "[",
199 },
200 Token {
201 kind: TokenKind::RightSquareBracket,
202 literal: "]",
203 },
204 Token {
205 kind: TokenKind::Comma,
206 literal: ",",
207 },
208 Token {
209 kind: TokenKind::Semicolon,
210 literal: ";",
211 },
212 Token {
213 kind: TokenKind::NewLine,
214 literal: "<newline>",
215 },
216 Token {
217 kind: TokenKind::Plus,
218 literal: "+",
219 },
220 Token {
221 kind: TokenKind::Minus,
222 literal: "-",
223 },
224 Token {
225 kind: TokenKind::Asterisk,
226 literal: "*",
227 },
228 Token {
229 kind: TokenKind::Percent,
230 literal: "%",
231 },
232 Token {
233 kind: TokenKind::Caret,
234 literal: "^",
235 },
236 Token {
237 kind: TokenKind::ExclamationMark,
238 literal: "!",
239 },
240 Token {
241 kind: TokenKind::GreaterThan,
242 literal: ">",
243 },
244 Token {
245 kind: TokenKind::LessThan,
246 literal: "<",
247 },
248 Token {
249 kind: TokenKind::Pipe,
250 literal: "|",
251 },
252 Token {
253 kind: TokenKind::QuestionMark,
254 literal: "?",
255 },
256 Token {
257 kind: TokenKind::Colon,
258 literal: ":",
259 },
260 Token {
261 kind: TokenKind::Tilde,
262 literal: "~",
263 },
264 Token {
265 kind: TokenKind::DollarSign,
266 literal: "$",
267 },
268 Token {
269 kind: TokenKind::Equal,
270 literal: "=",
271 },
272 Token {
273 kind: TokenKind::Eof,
274 literal: "",
275 },
276 ];
277
278 for expected in expected_tokens {
279 let token = lexer.next_token();
280 assert_eq!(expected, token);
281 }
282 }
283}