1use crate::{kind::SoliditySyntaxKind, language::SolidityLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, SolidityLanguage>;
5
6#[derive(Clone)]
7pub struct SolidityLexer<'config> {
8 config: &'config SolidityLanguage,
9}
10
11impl<'config> Lexer<SolidityLanguage> for SolidityLexer<'config> {
12 fn lex_incremental(
13 &self,
14 source: impl Source,
15 changed: usize,
16 cache: IncrementalCache<SolidityLanguage>,
17 ) -> LexOutput<SolidityLanguage> {
18 let mut state = LexerState::new_with_cache(source, changed, cache);
19 let result = self.run(&mut state);
20 state.finish(result)
21 }
22}
23
24impl<'config> SolidityLexer<'config> {
25 pub fn new(config: &'config SolidityLanguage) -> Self {
26 Self { config }
27 }
28
29 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
30 while state.not_at_end() {
31 let safe_point = state.get_position();
32
33 if self.skip_whitespace(state) {
34 continue;
35 }
36
37 if self.lex_newline(state) {
38 continue;
39 }
40
41 if self.lex_line_comment(state) {
42 continue;
43 }
44
45 if self.lex_block_comment(state) {
46 continue;
47 }
48
49 if self.lex_identifier_or_keyword(state) {
50 continue;
51 }
52
53 if self.lex_number(state) {
54 continue;
55 }
56
57 if self.lex_string(state) {
58 continue;
59 }
60
61 if self.lex_operator(state) {
62 continue;
63 }
64
65 if self.lex_delimiter(state) {
66 continue;
67 }
68
69 if let Some(ch) = state.peek() {
71 state.advance(ch.len_utf8());
72 state.add_token(SoliditySyntaxKind::Error, safe_point, state.get_position());
73 }
74 }
75
76 state.add_token(SoliditySyntaxKind::Eof, state.get_position(), state.get_position());
78 Ok(())
79 }
80
81 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
83 let start_pos = state.get_position();
84
85 while let Some(ch) = state.peek() {
86 if ch == ' ' || ch == '\t' {
87 state.advance(ch.len_utf8());
88 }
89 else {
90 break;
91 }
92 }
93
94 if state.get_position() > start_pos {
95 state.add_token(SoliditySyntaxKind::Whitespace, start_pos, state.get_position());
96 true
97 }
98 else {
99 false
100 }
101 }
102
103 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
105 let start_pos = state.get_position();
106
107 if let Some('\n') = state.peek() {
108 state.advance(1);
109 state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
110 true
111 }
112 else if let Some('\r') = state.peek() {
113 state.advance(1);
114 if let Some('\n') = state.peek() {
115 state.advance(1);
116 }
117 state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
118 true
119 }
120 else {
121 false
122 }
123 }
124
125 fn lex_line_comment<S: Source>(&self, state: &mut State<S>) -> bool {
127 let start_pos = state.get_position();
128
129 if let Some('/') = state.peek() {
130 state.advance(1);
131 if let Some('/') = state.peek() {
132 state.advance(1);
133
134 while let Some(ch) = state.peek() {
135 if ch == '\n' || ch == '\r' {
136 break;
137 }
138 else {
139 state.advance(ch.len_utf8());
140 }
141 }
142
143 state.add_token(SoliditySyntaxKind::LineComment, start_pos, state.get_position());
144 true
145 }
146 else {
147 state.set_position(start_pos);
148 false
149 }
150 }
151 else {
152 false
153 }
154 }
155
156 fn lex_block_comment<S: Source>(&self, state: &mut State<S>) -> bool {
158 let start_pos = state.get_position();
159
160 if let Some('/') = state.peek() {
161 state.advance(1);
162 if let Some('*') = state.peek() {
163 state.advance(1);
164
165 while let Some(ch) = state.peek() {
166 if ch == '*' {
167 state.advance(1);
168 if let Some('/') = state.peek() {
169 state.advance(1);
170 break;
171 }
172 }
173 else {
174 state.advance(ch.len_utf8());
175 }
176 }
177
178 state.add_token(SoliditySyntaxKind::BlockComment, start_pos, state.get_position());
179 true
180 }
181 else {
182 state.set_position(start_pos);
183 false
184 }
185 }
186 else {
187 false
188 }
189 }
190
191 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
193 let start_pos = state.get_position();
194
195 if let Some(ch) = state.peek() {
196 if ch.is_ascii_alphabetic() || ch == '_' {
197 state.advance(ch.len_utf8());
198
199 while let Some(ch) = state.peek() {
200 if ch.is_ascii_alphanumeric() || ch == '_' {
201 state.advance(ch.len_utf8());
202 }
203 else {
204 break;
205 }
206 }
207
208 let text = state.get_text_from(start_pos);
209 let token_kind = self.keyword_or_identifier(text);
210 state.add_token(token_kind, start_pos, state.get_position());
211 true
212 }
213 else {
214 false
215 }
216 }
217 else {
218 false
219 }
220 }
221
222 fn keyword_or_identifier(&self, text: &str) -> SoliditySyntaxKind {
224 match text {
225 "contract" => SoliditySyntaxKind::Contract,
226 "interface" => SoliditySyntaxKind::Interface,
227 "library" => SoliditySyntaxKind::Library,
228 "function" => SoliditySyntaxKind::Function,
229 "modifier" => SoliditySyntaxKind::Modifier,
230 "event" => SoliditySyntaxKind::Event,
231 "struct" => SoliditySyntaxKind::Struct,
232 "enum" => SoliditySyntaxKind::Enum,
233 "mapping" => SoliditySyntaxKind::Mapping,
234 "public" => SoliditySyntaxKind::Public,
235 "private" => SoliditySyntaxKind::Private,
236 "internal" => SoliditySyntaxKind::Internal,
237 "external" => SoliditySyntaxKind::External,
238 "pure" => SoliditySyntaxKind::Pure,
239 "view" => SoliditySyntaxKind::View,
240 "payable" => SoliditySyntaxKind::Payable,
241 "constant" => SoliditySyntaxKind::Constant,
242 "bool" => SoliditySyntaxKind::Bool,
243 "string" => SoliditySyntaxKind::String,
244 "bytes" => SoliditySyntaxKind::Bytes,
245 "address" => SoliditySyntaxKind::Address,
246 "uint" => SoliditySyntaxKind::Uint,
247 "int" => SoliditySyntaxKind::Int,
248 "fixed" => SoliditySyntaxKind::Fixed,
249 "ufixed" => SoliditySyntaxKind::Ufixed,
250 "if" => SoliditySyntaxKind::If,
251 "else" => SoliditySyntaxKind::Else,
252 "for" => SoliditySyntaxKind::For,
253 "while" => SoliditySyntaxKind::While,
254 "do" => SoliditySyntaxKind::Do,
255 "break" => SoliditySyntaxKind::Break,
256 "continue" => SoliditySyntaxKind::Continue,
257 "return" => SoliditySyntaxKind::Return,
258 "try" => SoliditySyntaxKind::Try,
259 "catch" => SoliditySyntaxKind::Catch,
260 "import" => SoliditySyntaxKind::Import,
261 "pragma" => SoliditySyntaxKind::Pragma,
262 "using" => SoliditySyntaxKind::Using,
263 "is" => SoliditySyntaxKind::Is,
264 "override" => SoliditySyntaxKind::Override,
265 "virtual" => SoliditySyntaxKind::Virtual,
266 "abstract" => SoliditySyntaxKind::Abstract,
267 "true" | "false" => SoliditySyntaxKind::BooleanLiteral,
268 _ => SoliditySyntaxKind::Identifier,
269 }
270 }
271
272 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
274 let start_pos = state.get_position();
275
276 if let Some(ch) = state.peek() {
277 if ch.is_ascii_digit() {
278 state.advance(1);
279
280 if ch == '0' {
282 if let Some('x') | Some('X') = state.peek() {
283 state.advance(1);
284 while let Some(ch) = state.peek() {
285 if ch.is_ascii_hexdigit() {
286 state.advance(1);
287 }
288 else {
289 break;
290 }
291 }
292 state.add_token(SoliditySyntaxKind::HexLiteral, start_pos, state.get_position());
293 return true;
294 }
295 }
296
297 while let Some(ch) = state.peek() {
299 if ch.is_ascii_digit() {
300 state.advance(1);
301 }
302 else {
303 break;
304 }
305 }
306
307 if let Some('.') = state.peek() {
309 state.advance(1);
310 while let Some(ch) = state.peek() {
311 if ch.is_ascii_digit() {
312 state.advance(1);
313 }
314 else {
315 break;
316 }
317 }
318 }
319
320 if let Some('e') | Some('E') = state.peek() {
322 state.advance(1);
323 if let Some('+') | Some('-') = state.peek() {
324 state.advance(1);
325 }
326 while let Some(ch) = state.peek() {
327 if ch.is_ascii_digit() {
328 state.advance(1);
329 }
330 else {
331 break;
332 }
333 }
334 }
335
336 state.add_token(SoliditySyntaxKind::NumberLiteral, start_pos, state.get_position());
337 true
338 }
339 else {
340 false
341 }
342 }
343 else {
344 false
345 }
346 }
347
348 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
350 let start_pos = state.get_position();
351
352 if let Some(quote) = state.peek() {
353 if quote == '"' || quote == '\'' {
354 state.advance(1);
355 let mut found_end = false;
356
357 while let Some(ch) = state.peek() {
358 if ch == quote {
359 state.advance(1);
360 found_end = true;
361 break;
362 }
363 else if ch == '\\' {
364 state.advance(1);
365 if let Some(_) = state.peek() {
366 state.advance(1);
367 }
368 }
369 else if ch == '\n' || ch == '\r' {
370 break; }
372 else {
373 state.advance(ch.len_utf8());
374 }
375 }
376
377 if found_end {
378 state.add_token(SoliditySyntaxKind::StringLiteral, start_pos, state.get_position());
379 }
380 else {
381 state.add_token(SoliditySyntaxKind::Error, start_pos, state.get_position());
382 }
383 true
384 }
385 else {
386 false
387 }
388 }
389 else {
390 false
391 }
392 }
393
394 fn lex_operator<S: Source>(&self, state: &mut State<S>) -> bool {
396 let start_pos = state.get_position();
397
398 if let Some(ch) = state.peek() {
399 let token_kind = match ch {
400 '+' => {
401 state.advance(1);
402 if let Some('=') = state.peek() {
403 state.advance(1);
404 SoliditySyntaxKind::PlusAssign
405 }
406 else {
407 SoliditySyntaxKind::Plus
408 }
409 }
410 '-' => {
411 state.advance(1);
412 if let Some('=') = state.peek() {
413 state.advance(1);
414 SoliditySyntaxKind::MinusAssign
415 }
416 else if let Some('>') = state.peek() {
417 state.advance(1);
418 SoliditySyntaxKind::Arrow
419 }
420 else {
421 SoliditySyntaxKind::Minus
422 }
423 }
424 '*' => {
425 state.advance(1);
426 if let Some('=') = state.peek() {
427 state.advance(1);
428 SoliditySyntaxKind::StarAssign
429 }
430 else if let Some('*') = state.peek() {
431 state.advance(1);
432 SoliditySyntaxKind::Power
433 }
434 else {
435 SoliditySyntaxKind::Star
436 }
437 }
438 '/' => {
439 state.advance(1);
441 if let Some('=') = state.peek() {
442 state.advance(1);
443 SoliditySyntaxKind::SlashAssign
444 }
445 else {
446 SoliditySyntaxKind::Slash
447 }
448 }
449 '%' => {
450 state.advance(1);
451 if let Some('=') = state.peek() {
452 state.advance(1);
453 SoliditySyntaxKind::PercentAssign
454 }
455 else {
456 SoliditySyntaxKind::Percent
457 }
458 }
459 '=' => {
460 state.advance(1);
461 if let Some('=') = state.peek() {
462 state.advance(1);
463 SoliditySyntaxKind::Equal
464 }
465 else {
466 SoliditySyntaxKind::Assign
467 }
468 }
469 '!' => {
470 state.advance(1);
471 if let Some('=') = state.peek() {
472 state.advance(1);
473 SoliditySyntaxKind::NotEqual
474 }
475 else {
476 SoliditySyntaxKind::Not
477 }
478 }
479 '<' => {
480 state.advance(1);
481 if let Some('=') = state.peek() {
482 state.advance(1);
483 SoliditySyntaxKind::LessEqual
484 }
485 else if let Some('<') = state.peek() {
486 state.advance(1);
487 SoliditySyntaxKind::LeftShift
488 }
489 else {
490 SoliditySyntaxKind::Less
491 }
492 }
493 '>' => {
494 state.advance(1);
495 if let Some('=') = state.peek() {
496 state.advance(1);
497 SoliditySyntaxKind::GreaterEqual
498 }
499 else if let Some('>') = state.peek() {
500 state.advance(1);
501 SoliditySyntaxKind::RightShift
502 }
503 else {
504 SoliditySyntaxKind::Greater
505 }
506 }
507 '&' => {
508 state.advance(1);
509 if let Some('&') = state.peek() {
510 state.advance(1);
511 SoliditySyntaxKind::And
512 }
513 else {
514 SoliditySyntaxKind::BitAnd
515 }
516 }
517 '|' => {
518 state.advance(1);
519 if let Some('|') = state.peek() {
520 state.advance(1);
521 SoliditySyntaxKind::Or
522 }
523 else {
524 SoliditySyntaxKind::BitOr
525 }
526 }
527 '^' => {
528 state.advance(1);
529 SoliditySyntaxKind::BitXor
530 }
531 '~' => {
532 state.advance(1);
533 SoliditySyntaxKind::BitNot
534 }
535 _ => return false,
536 };
537
538 state.add_token(token_kind, start_pos, state.get_position());
539 true
540 }
541 else {
542 false
543 }
544 }
545
546 fn lex_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
548 let start_pos = state.get_position();
549
550 if let Some(ch) = state.peek() {
551 let token_kind = match ch {
552 '(' => SoliditySyntaxKind::LeftParen,
553 ')' => SoliditySyntaxKind::RightParen,
554 '{' => SoliditySyntaxKind::LeftBrace,
555 '}' => SoliditySyntaxKind::RightBrace,
556 '[' => SoliditySyntaxKind::LeftBracket,
557 ']' => SoliditySyntaxKind::RightBracket,
558 ';' => SoliditySyntaxKind::Semicolon,
559 ',' => SoliditySyntaxKind::Comma,
560 '.' => SoliditySyntaxKind::Dot,
561 _ => return false,
562 };
563
564 state.advance(ch.len_utf8());
565 state.add_token(token_kind, start_pos, state.get_position());
566 true
567 }
568 else {
569 false
570 }
571 }
572
573 fn lex_single_char_token<S: Source>(&self, state: &mut State<S>) -> bool {
575 if let Some(ch) = state.peek() {
576 let start_pos = state.get_position();
577
578 let token_kind = match ch {
579 '(' => SoliditySyntaxKind::LeftParen,
580 ')' => SoliditySyntaxKind::RightParen,
581 '{' => SoliditySyntaxKind::LeftBrace,
582 '}' => SoliditySyntaxKind::RightBrace,
583 '[' => SoliditySyntaxKind::LeftBracket,
584 ']' => SoliditySyntaxKind::RightBracket,
585 ';' => SoliditySyntaxKind::Semicolon,
586 ',' => SoliditySyntaxKind::Comma,
587 '.' => SoliditySyntaxKind::Dot,
588 _ => return false,
589 };
590
591 state.advance(ch.len_utf8());
592 state.add_token(token_kind, start_pos, state.get_position());
593 true
594 }
595 else {
596 false
597 }
598 }
599}