1use std::str::Chars;
2use symboscript_types::lexer::{Token, TokenKind, TokenValue};
3use symboscript_utils::report_error;
4
5pub struct Lexer<'a> {
6 path: &'a str,
8
9 source: &'a str,
11
12 chars: Chars<'a>,
14
15 comment: bool,
17}
18
19impl<'a> Lexer<'a> {
20 pub fn new(path: &'a str, source: &'a str, comment: bool) -> Self {
21 Self {
22 path,
23 source,
24 chars: source.chars(),
25 comment,
26 }
27 }
28
29 pub fn tokenize(&mut self) -> Vec<Token> {
30 let mut tokens = Vec::new();
31
32 loop {
33 let token = self.next_token();
34 if token.kind == TokenKind::Eof {
35 break;
36 }
37 tokens.push(token);
38 }
39
40 tokens
41 }
42
43 pub fn next_token(&mut self) -> Token {
44 self.skip_trivia();
45 let start = self.offset();
46 let mut kind = self.next_kind();
47 let end = self.offset();
48
49 if kind == TokenKind::Skip {
50 return self.next_token();
51 }
52
53 let s = self.source[start..end].to_owned();
54
55 let mut value = TokenValue::None;
56
57 match kind {
58 TokenKind::Number => {
59 value = TokenValue::Number(s.parse::<f64>().unwrap_or_default());
60 }
61
62 TokenKind::Identifier => {
63 kind = self.match_keyword(&s);
64
65 if kind == TokenKind::Identifier {
66 value = TokenValue::Identifier(s);
67 }
68 }
69
70 TokenKind::Str => {
71 value = TokenValue::Str(s[1..s.len() - 1].to_string().replace("\\n", "\n"));
72 }
73
74 TokenKind::DocComment => value = TokenValue::Str(s),
75
76 TokenKind::Unexpected => {
77 report_error(self.path, self.source, "Unexpected token", start, end)
78 }
79 _ => {}
80 };
81
82 Token {
83 kind,
84 start,
85 end,
86 value,
87 }
88 }
89
90 fn next_kind(&mut self) -> TokenKind {
91 while let Some(c) = self.next() {
92 match c {
93 '#' => return self.read_comment(),
94
95 ';' => return TokenKind::Semicolon,
96 ',' => return TokenKind::Comma,
97 ':' => return self.read_one_more('=', TokenKind::FormulaAssign, TokenKind::Colon),
98 '.' => return self.read_dot(),
99
100 '+' => {
101 return self.read_one_more_variants(
102 TokenKind::Plus,
103 &['=', '+'],
104 &[TokenKind::PlusAssign, TokenKind::PlusPlus],
105 )
106 }
107 '-' => {
108 return self.read_one_more_variants(
109 TokenKind::Minus,
110 &['=', '-'],
111 &[TokenKind::MinusAssign, TokenKind::MinusMinus],
112 )
113 }
114 '*' => return self.read_one_more('=', TokenKind::MultiplyAssign, TokenKind::Star),
115 '/' => return self.read_one_more('=', TokenKind::DivideAssign, TokenKind::Slash),
116 '^' => return self.read_one_more('=', TokenKind::PowerAssign, TokenKind::Caret),
117 '%' => return self.read_one_more('=', TokenKind::ModuloAssign, TokenKind::Modulo),
118
119 '&' => {
120 return self.read_one_more(
121 '&',
122 TokenKind::AmpersandAmpersand,
123 TokenKind::Ampersand,
124 )
125 }
126 '|' => return self.read_one_more('|', TokenKind::PipePipe, TokenKind::Pipe),
127 '~' => return TokenKind::Tilde,
128 '?' => return TokenKind::Question,
129
130 '=' => return self.read_one_more('=', TokenKind::Equal, TokenKind::Assign),
131 '!' => {
132 return self.read_one_more('=', TokenKind::NotEqual, TokenKind::ExclamationMark)
133 }
134 '<' => {
135 return self.read_one_more_variants(
136 TokenKind::Less,
137 &['=', '<'],
138 &[TokenKind::LessEqual, TokenKind::BitLeftShift],
139 )
140 }
141 '>' => {
142 return self.read_one_more_variants(
143 TokenKind::Greater,
144 &['=', '>'],
145 &[TokenKind::GreaterEqual, TokenKind::BitRightShift],
146 )
147 }
148
149 '(' => return TokenKind::LParen,
150 ')' => return TokenKind::RParen,
151 '{' => return TokenKind::LAngle,
152 '}' => return TokenKind::RAngle,
153 '[' => return TokenKind::LSquare,
154 ']' => return TokenKind::RSquare,
155
156 'a'..='z' | 'A'..='Z' | '_' => return self.read_identifier(),
157
158 '0'..='9' => return self.read_number(),
159 '"' | '\'' | '`' => return self.read_string(c),
160
161 _ => return TokenKind::Unexpected,
162 };
163 }
164 TokenKind::Eof
165 }
166
167 fn match_keyword(&self, ident: &str) -> TokenKind {
168 if ident.len() == 1 || ident.len() > 10 {
170 return TokenKind::Identifier;
171 }
172
173 match ident {
174 "true" => TokenKind::True,
175 "false" => TokenKind::False,
176 "None" => TokenKind::None,
177
178 "if" => TokenKind::If,
179 "else" => TokenKind::Else,
180 "while" => TokenKind::While,
181 "loop" => TokenKind::Loop,
182 "for" => TokenKind::For,
183 "let" => TokenKind::Let,
184 "fn" => TokenKind::Function,
185 "scope" => TokenKind::Scope,
186 "return" => TokenKind::Return,
187 "yield" => TokenKind::Yield,
188 "break" => TokenKind::Break,
189 "continue" => TokenKind::Continue,
190 "in" => TokenKind::In,
191 "of" => TokenKind::Of,
192 "delete" => TokenKind::Delete,
193
194 "throw" => TokenKind::Throw,
195
196 "import" => TokenKind::Import,
197 "as" => TokenKind::As,
198 "context" => TokenKind::Context,
199
200 "async" => TokenKind::Async,
201 "await" => TokenKind::Await,
202
203 "block" => TokenKind::Block,
204
205 "mut" => TokenKind::Mut,
206
207 "band" => TokenKind::Ampersand,
209 "bxor" => TokenKind::BitXor,
210 "bor" => TokenKind::Pipe,
211 "bnot" => TokenKind::Tilde,
212 "bshl" => TokenKind::BitLeftShift,
213 "bshr" => TokenKind::BitRightShift,
214
215 "xor" => TokenKind::Xor,
216 "and" => TokenKind::AmpersandAmpersand,
217 "or" => TokenKind::PipePipe,
218 "not" => TokenKind::ExclamationMark,
219 _ => TokenKind::Identifier,
223 }
224 }
225
226 fn skip_trivia(&mut self) {
227 while let Some(c) = self.peek() {
228 match c {
229 ' ' | '\t' | '\n' | '\r' => {
230 self.next();
231 }
232 _ => break,
233 }
234 }
235 }
236
237 fn read_dot(&mut self) -> TokenKind {
238 if self.peek() == Some('.') {
239 self.next();
240 return TokenKind::Range;
241 } else if ("0"..="9").contains(&self.peek().unwrap_or_default().to_string().as_str()) {
242 return self.read_number();
243 }
244 return TokenKind::Dot;
245 }
246
247 fn read_number(&mut self) -> TokenKind {
248 while let Some(c) = self.peek() {
249 match c {
250 '0'..='9' => {
251 self.next();
252 }
253 '.' | 'e' | 'E' => {
254 if let Some(c) = self.peek_two() {
255 match c {
256 '0'..='9' => {
257 self.next();
258 self.next();
259 }
260 _ => {
261 break;
262 }
263 }
264 } else {
265 break;
266 }
267 }
268 _ => break,
269 };
270 }
271
272 TokenKind::Number
273 }
274
275 fn read_comment(&mut self) -> TokenKind {
276 if self.eat('/') {
277 while let Some(c) = self.peek() {
278 self.next();
279 if c == '/' {
280 if self.eat('#') {
281 if self.comment {
282 return TokenKind::DocComment;
283 } else {
284 return TokenKind::Skip;
285 }
286 }
287 }
288 }
289 }
290
291 while let Some(c) = self.peek() {
292 match c {
293 '\n' => {
294 self.next();
295 break;
296 }
297 _ => {
298 self.next();
299 }
300 };
301 }
302
303 if self.comment {
304 TokenKind::Comment
305 } else {
306 TokenKind::Skip
307 }
308 }
309
310 fn read_string(&mut self, init_char: char) -> TokenKind {
311 while let Some(c) = self.peek() {
312 match c {
313 c if c == init_char => {
314 self.next();
315 return TokenKind::Str;
316 }
317 '\\' => {
318 self.next();
319 self.next();
320 }
321 _ => {
322 self.next();
323 }
324 };
325 }
326 TokenKind::Unexpected
327 }
328
329 fn read_identifier(&mut self) -> TokenKind {
330 while let Some(c) = self.peek() {
331 match c {
332 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => {
333 self.next();
334 }
335 _ => break,
336 };
337 }
338
339 TokenKind::Identifier
340 }
341
342 fn read_one_more(
343 &mut self,
344 ch: char,
345 kind_expected: TokenKind,
346 kind_unexpected: TokenKind,
347 ) -> TokenKind {
348 match self.peek() {
349 Some(c) if c == ch => {
350 self.next();
351 return kind_expected;
352 }
353 _ => return kind_unexpected,
354 }
355 }
356
357 fn read_one_more_variants(
358 &mut self,
359 kind_unexpected: TokenKind,
360 char_expected: &[char],
361 kind_expected: &[TokenKind],
362 ) -> TokenKind {
363 match self.peek() {
364 Some(c) if char_expected.contains(&c) => {
365 self.next();
366 return kind_expected[char_expected.iter().position(|&x| x == c).unwrap()];
367 }
368
369 _ => return kind_unexpected,
370 }
371 }
372
373 fn offset(&self) -> usize {
375 self.source.len() - self.chars.as_str().len()
376 }
377
378 fn peek(&self) -> Option<char> {
379 self.chars.as_str().chars().next()
380 }
381
382 fn peek_two(&self) -> Option<char> {
383 let mut new_chars = self.chars.as_str().chars();
384 new_chars.next();
385 new_chars.next()
386 }
387
388 fn eat(&mut self, ch: char) -> bool {
389 if self.peek() == Some(ch) {
390 self.next();
391 true
392 } else {
393 false
394 }
395 }
396
397 fn next(&mut self) -> Option<char> {
398 self.chars.next()
399 }
400}