1use crate::token::Token;
7
8pub struct Lexer {
10 input: Vec<char>,
11 position: usize, read_position: usize, ch: char, line: usize, column: usize, had_whitespace_before_token: bool, }
18
19impl Lexer {
20 pub fn new(input: &str) -> Self {
22 let mut lexer = Lexer {
23 input: input.chars().collect(),
24 position: 0,
25 read_position: 0,
26 ch: '\0',
27 line: 1,
28 column: 0,
29 had_whitespace_before_token: false,
30 };
31 lexer.read_char(); lexer
33 }
34
35 pub fn line(&self) -> usize {
37 self.line
38 }
39
40 pub fn column(&self) -> usize {
42 self.column
43 }
44
45 pub fn had_whitespace(&self) -> bool {
47 self.had_whitespace_before_token
48 }
49
50 fn read_char(&mut self) {
52 if self.read_position >= self.input.len() {
53 self.ch = '\0'; } else {
55 self.ch = self.input[self.read_position];
56 }
57
58 if self.ch == '\n' {
60 self.line += 1;
61 self.column = 0;
62 } else {
63 self.column += 1;
64 }
65
66 self.position = self.read_position;
67 self.read_position += 1;
68 }
69
70 fn peek_char(&self) -> char {
72 if self.read_position >= self.input.len() {
73 '\0'
74 } else {
75 self.input[self.read_position]
76 }
77 }
78
79 fn peek_char_n(&self, n: usize) -> char {
81 let pos = self.position + n;
82 if pos >= self.input.len() {
83 '\0'
84 } else {
85 self.input[pos]
86 }
87 }
88
89 pub fn next_token(&mut self) -> Token {
91 let had_ws = self.skip_whitespace();
92 self.had_whitespace_before_token = had_ws;
93
94 let token = match self.ch {
95 '+' => Token::Plus,
97 '-' => {
98 if self.peek_char() == '>' {
99 self.read_char();
100 Token::Arrow
101 } else {
102 Token::Minus
103 }
104 }
105 '*' => Token::Multiply,
106 '/' => {
107 if self.peek_char() == '/' {
109 self.skip_line_comment();
110 return self.next_token();
111 } else if self.peek_char() == '*' {
112 self.skip_block_comment();
113 return self.next_token();
114 } else {
115 Token::Divide
116 }
117 }
118 '%' => Token::Modulo,
119
120 '=' => {
122 if self.peek_char() == '=' {
123 self.read_char();
124 Token::Equal
125 } else {
126 Token::Assign
127 }
128 }
129 '!' => {
130 if self.peek_char() == '=' {
131 self.read_char();
132 Token::NotEqual
133 } else {
134 Token::Not
135 }
136 }
137 '<' => {
138 if self.peek_char() == '=' {
139 self.read_char();
140 Token::LessEqual
141 } else {
142 Token::Less
143 }
144 }
145 '>' => {
146 if self.peek_char() == '=' {
147 self.read_char();
148 Token::GreaterEqual
149 } else {
150 Token::Greater
151 }
152 }
153 '&' => {
154 if self.peek_char() == '&' {
155 self.read_char();
156 Token::And
157 } else {
158 Token::Illegal('&')
159 }
160 }
161 '|' => {
162 if self.peek_char() == '|' {
163 self.read_char();
164 Token::Or
165 } else {
166 Token::Illegal('|')
167 }
168 }
169
170 '(' => Token::LeftParen,
172 ')' => Token::RightParen,
173 '{' => Token::LeftBrace,
174 '}' => Token::RightBrace,
175 '[' => Token::LeftBracket,
176 ']' => Token::RightBracket,
177 ',' => Token::Comma,
178 ':' => Token::Colon,
179 ';' => Token::Semicolon,
180
181 '"' => {
183 if self.peek_char() == '"' && self.peek_char_n(2) == '"' {
185 return self.read_multiline_string();
186 } else {
187 return self.read_string();
188 }
189 }
190
191 '\n' => Token::Newline,
193
194 '\0' => Token::EOF,
196
197 _ => {
199 if self.ch.is_alphabetic() || self.ch == '_' {
200 return self.read_identifier();
201 } else if self.ch.is_numeric() {
202 return self.read_number();
203 } else {
204 Token::Illegal(self.ch)
205 }
206 }
207 };
208
209 self.read_char();
210 token
211 }
212
213 fn skip_whitespace(&mut self) -> bool {
216 let mut skipped = false;
217 while self.ch == ' ' || self.ch == '\t' || self.ch == '\r' {
218 skipped = true;
219 self.read_char();
220 }
221 skipped
222 }
223
224 fn skip_line_comment(&mut self) {
226 while self.ch != '\n' && self.ch != '\0' {
227 self.read_char();
228 }
229 }
230
231 fn skip_block_comment(&mut self) {
233 self.read_char(); self.read_char(); while !(self.ch == '*' && self.peek_char() == '/') && self.ch != '\0' {
237 if self.ch == '\n' {
238 self.line += 1;
239 self.column = 0;
240 }
241 self.read_char();
242 }
243
244 if self.ch != '\0' {
245 self.read_char(); self.read_char(); }
248 }
249
250 fn read_identifier(&mut self) -> Token {
252 let start = self.position;
253
254 while self.ch.is_alphanumeric() || self.ch == '_' {
256 self.read_char();
257 }
258
259 let ident: String = self.input[start..self.position].iter().collect();
260 Token::lookup_keyword(&ident)
261 }
262
263 fn read_number(&mut self) -> Token {
265 let start = self.position;
266 let mut has_dot = false;
267
268 while self.ch.is_numeric() || (self.ch == '.' && !has_dot) {
269 if self.ch == '.' {
270 if !self.peek_char().is_numeric() {
272 break;
273 }
274 has_dot = true;
275 }
276 self.read_char();
277 }
278
279 let num_str: String = self.input[start..self.position].iter().collect();
280
281 if !has_dot && num_str.len() > 15 {
283 return Token::BigInteger(num_str);
284 }
285
286 match num_str.parse::<f64>() {
287 Ok(num) => Token::Number(num),
288 Err(_) => Token::Illegal('0'), }
290 }
291
292 fn read_string(&mut self) -> Token {
294 self.read_char(); let start = self.position;
296
297 while self.ch != '"' && self.ch != '\0' {
298 if self.ch == '\\' {
300 self.read_char(); if self.ch != '\0' {
302 self.read_char(); }
304 } else {
305 if self.ch == '\n' {
306 self.line += 1;
307 self.column = 0;
308 }
309 self.read_char();
310 }
311 }
312
313 if self.ch == '\0' {
314 return Token::Illegal('"'); }
316
317 let string: String = self.input[start..self.position].iter().collect();
318 self.read_char(); Token::String(self.process_escapes(&string))
322 }
323
324 fn read_multiline_string(&mut self) -> Token {
326 self.read_char(); self.read_char(); self.read_char(); let start = self.position;
332
333 loop {
335 if self.ch == '\0' {
336 return Token::Illegal('"'); }
338
339 if self.ch == '"' && self.peek_char() == '"' && self.peek_char_n(2) == '"' {
341 let string: String = self.input[start..self.position].iter().collect();
342
343 self.read_char(); self.read_char(); self.read_char(); return Token::String(self.process_escapes(&string));
350 }
351
352 if self.ch == '\n' {
354 self.line += 1;
355 self.column = 0;
356 }
357
358 self.read_char();
359 }
360 }
361
362 fn process_escapes(&self, s: &str) -> String {
364 let mut result = String::new();
365 let mut chars = s.chars();
366
367 while let Some(ch) = chars.next() {
368 if ch == '\\' {
369 match chars.next() {
370 Some('n') => result.push('\n'),
371 Some('t') => result.push('\t'),
372 Some('r') => result.push('\r'),
373 Some('\\') => result.push('\\'),
374 Some('"') => result.push('"'),
375 Some(c) => {
376 result.push('\\');
377 result.push(c);
378 }
379 None => result.push('\\'),
380 }
381 } else {
382 result.push(ch);
383 }
384 }
385
386 result
387 }
388}