1use crate::scanner::TokenType::Identifier;
2
3#[derive(Debug, Clone)]
5pub struct Scanner {
6 pub file: String,
7 pub code: String,
8 pub cur_line: usize,
9 pub start_pos: usize,
10 pub cur_pos: usize,
11}
12
13impl Default for Scanner {
14 fn default() -> Self {
15 Scanner {
16 file: String::new(),
17 code: "".to_string(),
18 cur_line: 0,
19 start_pos: 0,
20 cur_pos: 0,
21 }
22 }
23}
24
25#[derive(Debug, Clone)]
26pub struct Token {
27 pub token_type: TokenType,
28 pub line_num: usize,
29 pub lexeme: String,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq)]
33pub enum TokenType {
34 LeftParen,
36 RightParen,
38 LeftBrace,
40 RightBrace,
42 LeftBracket,
44 RightBracket,
46 Comma,
47 Dot,
48 Semicolon,
49 Minus,
50 MinusAssign,
51 MinusMinus,
52 Plus,
53 PlusAssign,
54 PlusPlus,
55 Slash,
56 SlashAssign,
57 HashTag,
58 Star,
59 StarAssign,
60 Bang,
61 BangEqual,
62 Equal,
63 EqualEqual,
64 Greater,
65 GreaterEqual,
66 Less,
67 LessEqual,
68 Colon,
69
70 Identifier,
71 String,
72 Number,
73
74 And,
75 Class,
76 Else,
77 False,
78 For,
79 Fun,
80 If,
81 Nil,
82 Or,
83 Print,
84 Return,
85 Super,
86 This,
87 True,
88 Var,
89 While,
90 Error,
91 EOF,
92 Import,
93}
94
95impl Scanner {
96 pub fn new(file: String, code: String, cur_line: usize) -> Scanner {
97 Scanner {
98 file,
99 code,
100 cur_line,
101 start_pos: 0,
102 cur_pos: 0,
103 }
104 }
105
106 fn create_token(&self, token_type: TokenType) -> Token {
107 Token {
109 token_type,
110 line_num: self.cur_line,
111 lexeme: if self.code.is_empty() {
112 "".to_string()
113 } else {
114 self.code[self.start_pos..self.cur_pos].to_string()
115 },
116 }
117 }
118
119 fn error_token(&self, message: &str) -> Token {
120 Token {
121 token_type: TokenType::Error,
122 line_num: self.cur_line,
123 lexeme: message.to_string(),
124 }
125 }
126
127 pub fn scan_token(&mut self) -> Token {
128 self.skip_whitespace();
129 self.start_pos = self.cur_pos;
130
131 if self.is_at_end() {
132 return self.create_token(TokenType::EOF);
133 }
134
135 let c = self.advance();
136
137 if Self::is_digit(c) {
138 return self.number();
139 }
140
141 if Self::is_alpha(c) {
142 return self.identifier();
143 }
144
145 match c {
146 b'(' => self.create_token(TokenType::LeftParen),
147 b')' => self.create_token(TokenType::RightParen),
148 b'{' => self.create_token(TokenType::LeftBrace),
149 b'}' => self.create_token(TokenType::RightBrace),
150 b'[' => self.create_token(TokenType::LeftBracket),
151 b']' => self.create_token(TokenType::RightBracket),
152 b',' => self.create_token(TokenType::Comma),
153 b'.' => self.create_token(TokenType::Dot),
154 b'-' => {
155 if self.match_char(b'=') {
156 self.create_token(TokenType::MinusAssign)
157 } else if self.match_char(b'-') {
158 self.create_token(TokenType::MinusMinus)
159 } else {
160 self.create_token(TokenType::Minus)
161 }
162 }
163 b'+' => {
164 if self.match_char(b'=') {
165 self.create_token(TokenType::PlusAssign)
166 } else if self.match_char(b'+') {
167 self.create_token(TokenType::PlusPlus)
168 } else {
169 self.create_token(TokenType::Plus)
170 }
171 }
172 b';' => self.create_token(TokenType::Semicolon),
173 b'*' => {
174 if self.match_char(b'=') {
175 self.create_token(TokenType::StarAssign)
176 } else {
177 self.create_token(TokenType::Star)
178 }
179 }
180 b'/' => {
181 if self.match_char(b'=') {
182 self.create_token(TokenType::SlashAssign)
183 } else {
184 self.create_token(TokenType::Slash)
185 }
186 }
187 b'#' => self.create_token(TokenType::HashTag),
188 b'!' => {
189 if self.match_char(b'=') {
190 self.create_token(TokenType::BangEqual)
191 } else {
192 self.create_token(TokenType::Bang)
193 }
194 }
195 b'=' => {
196 if self.match_char(b'=') {
197 self.create_token(TokenType::EqualEqual)
198 } else {
199 self.create_token(TokenType::Equal)
200 }
201 }
202 b'<' => {
203 if self.match_char(b'=') {
204 self.create_token(TokenType::LessEqual)
205 } else {
206 self.create_token(TokenType::Less)
207 }
208 }
209 b'>' => {
210 if self.match_char(b'=') {
211 self.create_token(TokenType::GreaterEqual)
212 } else {
213 self.create_token(TokenType::Greater)
214 }
215 }
216 b'"' => self.string(),
217 b':' => self.create_token(TokenType::Colon),
218 _ => self.error_token("Unexpected character."),
219 }
220 }
221
222 fn is_at_end(&self) -> bool {
223 self.cur_pos >= self.code.len()
224 }
225
226 fn is_digit(c: u8) -> bool {
227 c.is_ascii_digit()
228 }
229
230 fn is_alpha(c: u8) -> bool {
231 c.is_ascii_lowercase() || c.is_ascii_uppercase() || c == b'_'
232 }
233
234 fn can_peek_next(&self) -> bool {
235 self.cur_pos + 2 <= self.code.len()
236 }
237
238 fn advance(&mut self) -> u8 {
239 let ret = self.peek();
240 self.cur_pos += 1;
241 ret
242 }
243
244 fn match_char(&mut self, expected: u8) -> bool {
245 if self.is_at_end() || self.peek() != expected {
246 false
247 } else {
248 self.cur_pos += 1;
249 true
250 }
251 }
252
253 fn peek(&self) -> u8 {
254 if self.is_at_end() {
255 return b'\0';
256 }
257 self.code.as_bytes()[self.cur_pos]
258 }
259
260 fn peek_next(&self) -> u8 {
261 if self.is_at_end() {
262 return b'\0';
263 }
264 self.code.as_bytes()[self.cur_pos + 1]
265 }
266
267 fn skip_whitespace(&mut self) {
268 while !self.is_at_end() {
269 let next = self.peek();
270 if (next == b' ') || (next == b'\t') || (next == b'\r') {
271 self.advance();
272 } else if next == b'\n' {
273 self.advance();
274 self.cur_line += 1;
275 } else if next == b'/' {
276 if self.can_peek_next() && self.peek_next() == b'/' {
277 while !self.is_at_end() && self.peek() != b'\n' {
278 self.advance(); }
280
281 if !self.is_at_end() {
282 self.advance(); self.cur_line += 1;
284 }
285 } else {
286 return; }
288 } else if next == b'#' && self.cur_pos == 0 {
290 while !self.is_at_end() && self.peek() != b'\n' {
291 self.advance();
292 }
293
294 if !self.is_at_end() {
295 self.advance();
296 self.cur_line += 1;
297 }
298 } else {
299 return;
300 }
301 }
302 }
303
304 fn string(&mut self) -> Token {
305 while !self.is_at_end() && self.peek() != b'"' {
306 if self.peek() == b'\n' {
307 self.cur_line += 1;
308 }
309 self.advance();
310 }
311
312 if self.is_at_end() {
313 return self.error_token("Unterminated string.");
314 }
315
316 self.advance(); self.create_token(TokenType::String)
319 }
320
321 fn number(&mut self) -> Token {
322 if self.cur_pos == self.code.len() {
323 return self.error_token("Unexpected end of file.");
324 }
325 while Self::is_digit(self.peek()) {
326 self.advance();
327 }
328
329 if self.peek() == b'.' && Self::is_digit(self.peek_next()) {
330 self.advance(); while Self::is_digit(self.peek()) {
333 self.advance();
334 }
335 }
336
337 if self.peek() == b'f' {
339 self.advance();
340 }
341
342 self.create_token(TokenType::Number)
343 }
344
345 fn identifier(&mut self) -> Token {
346 if self.cur_pos == self.code.len() {
347 return self.error_token("Unexpected end of file.");
348 }
349 while Self::is_alpha(self.peek()) || Self::is_digit(self.peek()) {
350 self.advance();
351 }
352
353 self.create_token(self.identifier_type())
354 }
355
356 fn identifier_type(&self) -> TokenType {
357 let c = self.code.as_bytes()[self.start_pos];
358 return match c {
359 b'a' => self.check_for_keyword(1, 2, "nd", TokenType::And),
360 b'c' => self.check_for_keyword(1, 4, "lass", TokenType::Class),
361 b'e' => self.check_for_keyword(1, 3, "lse", TokenType::Else),
362 b'i' => {
363 if self.cur_pos - self.start_pos > 1 {
364 match self.code.as_bytes()[self.start_pos + 1] {
366 b'f' => TokenType::If,
367 b'm' => self.check_for_keyword(2, 4, "port", TokenType::Import),
368 _ => Identifier,
369 }
370 } else {
371 Identifier
372 }
373 }
374 b'n' => self.check_for_keyword(1, 2, "il", TokenType::Nil),
375 b'o' => self.check_for_keyword(1, 1, "r", TokenType::Or),
376 b'p' => self.check_for_keyword(1, 4, "rint", TokenType::Print),
377 b'r' => self.check_for_keyword(1, 5, "eturn", TokenType::Return),
378 b's' => self.check_for_keyword(1, 4, "uper", TokenType::Super),
379 b'v' => self.check_for_keyword(1, 2, "ar", TokenType::Var),
380 b'w' => self.check_for_keyword(1, 4, "hile", TokenType::While),
381 b'f' => {
382 if self.cur_pos - self.start_pos > 1 {
383 match self.code.as_bytes()[self.start_pos + 1] {
385 b'a' => self.check_for_keyword(2, 3, "lse", TokenType::False),
386 b'o' => self.check_for_keyword(2, 1, "r", TokenType::For),
387 b'u' => self.check_for_keyword(2, 1, "n", TokenType::Fun),
388 _ => Identifier,
389 }
390 } else {
391 Identifier
392 }
393 }
394 b't' => {
395 if self.cur_pos - self.start_pos > 1 {
396 match self.code.as_bytes()[self.start_pos + 1] {
398 b'h' => self.check_for_keyword(2, 2, "is", TokenType::This),
399 b'r' => self.check_for_keyword(2, 2, "ue", TokenType::True),
400 _ => Identifier,
401 }
402 } else {
403 Identifier
404 }
405 }
406 _ => Identifier,
407 };
408 }
409
410 fn check_for_keyword(
411 &self,
412 start: usize,
413 length: usize,
414 rest: &str,
415 keyword_type: TokenType,
416 ) -> TokenType {
417 if self.cur_pos - self.start_pos == start + length {
418 let begin = self.start_pos + start;
420 if &self.code[begin..begin + length] == rest {
421 return keyword_type;
422 }
423 }
424 Identifier
425 }
426}