1use crate::{kind::SoliditySyntaxKind, language::SolidityLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, SolidityLanguage>;
5
6#[derive(Clone)]
7pub struct SolidityLexer<'config> {
8 _config: &'config SolidityLanguage,
9}
10
11impl<'config> Lexer<SolidityLanguage> for SolidityLexer<'config> {
12 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<SolidityLanguage>) -> LexOutput<SolidityLanguage> {
13 let mut state = State::new(source);
14 let result = self.run(&mut state);
15 if result.is_ok() {
16 state.add_eof();
17 }
18 state.finish_with_cache(result, cache)
19 }
20}
21
22impl<'config> SolidityLexer<'config> {
23 pub fn new(config: &'config SolidityLanguage) -> Self {
24 Self { _config: config }
25 }
26
27 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
28 while state.not_at_end() {
29 let safe_point = state.get_position();
30
31 if self.skip_whitespace(state) {
32 continue;
33 }
34
35 if self.lex_newline(state) {
36 continue;
37 }
38
39 if self.lex_line_comment(state) {
40 continue;
41 }
42
43 if self.lex_block_comment(state) {
44 continue;
45 }
46
47 if self.lex_identifier_or_keyword(state) {
48 continue;
49 }
50
51 if self.lex_number(state) {
52 continue;
53 }
54
55 if self.lex_string(state) {
56 continue;
57 }
58
59 if self.lex_operator(state) {
60 continue;
61 }
62
63 if self.lex_delimiter(state) {
64 continue;
65 }
66
67 let start_pos = state.get_position();
69 if let Some(ch) = state.peek() {
70 state.advance(ch.len_utf8());
71 state.add_token(SoliditySyntaxKind::Error, start_pos, state.get_position());
72 }
73
74 state.advance_if_dead_lock(safe_point);
75 }
76
77 Ok(())
78 }
79
80 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82 let start_pos = state.get_position();
83
84 while let Some(ch) = state.peek() {
85 if ch == ' ' || ch == '\t' {
86 state.advance(ch.len_utf8());
87 }
88 else {
89 break;
90 }
91 }
92
93 if state.get_position() > start_pos {
94 state.add_token(SoliditySyntaxKind::Whitespace, start_pos, state.get_position());
95 true
96 }
97 else {
98 false
99 }
100 }
101
102 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
104 let start_pos = state.get_position();
105
106 if let Some('\n') = state.peek() {
107 state.advance(1);
108 state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
109 true
110 }
111 else if let Some('\r') = state.peek() {
112 state.advance(1);
113 if let Some('\n') = state.peek() {
114 state.advance(1);
115 }
116 state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
117 true
118 }
119 else {
120 false
121 }
122 }
123
124 fn lex_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
126 let start_pos = state.get_position();
127
128 if let Some('/') = state.peek() {
129 state.advance(1);
130 if let Some('/') = state.peek() {
131 state.advance(1);
132
133 while let Some(ch) = state.peek() {
134 if ch == '\n' || ch == '\r' {
135 break;
136 }
137 else {
138 state.advance(ch.len_utf8());
139 }
140 }
141
142 state.add_token(SoliditySyntaxKind::LineComment, start_pos, state.get_position());
143 true
144 }
145 else {
146 state.set_position(start_pos);
147 false
148 }
149 }
150 else {
151 false
152 }
153 }
154
155 fn lex_block_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
157 let start_pos = state.get_position();
158
159 if let Some('/') = state.peek() {
160 state.advance(1);
161 if let Some('*') = state.peek() {
162 state.advance(1);
163
164 while let Some(ch) = state.peek() {
165 if ch == '*' {
166 state.advance(1);
167 if let Some('/') = state.peek() {
168 state.advance(1);
169 break;
170 }
171 }
172 else {
173 state.advance(ch.len_utf8());
174 }
175 }
176
177 state.add_token(SoliditySyntaxKind::BlockComment, start_pos, state.get_position());
178 true
179 }
180 else {
181 state.set_position(start_pos);
182 false
183 }
184 }
185 else {
186 false
187 }
188 }
189
190 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
192 let start_pos = state.get_position();
193
194 if let Some(ch) = state.peek() {
195 if ch.is_ascii_alphabetic() || ch == '_' {
196 state.advance(ch.len_utf8());
197
198 while let Some(ch) = state.peek() {
199 if ch.is_ascii_alphanumeric() || ch == '_' {
200 state.advance(ch.len_utf8());
201 }
202 else {
203 break;
204 }
205 }
206
207 let text = state.get_text_from(start_pos);
208 let token_kind = self.keyword_or_identifier(&text);
209 state.add_token(token_kind, start_pos, state.get_position());
210 true
211 }
212 else {
213 false
214 }
215 }
216 else {
217 false
218 }
219 }
220
221 fn keyword_or_identifier(&self, text: &str) -> SoliditySyntaxKind {
223 match text {
224 "contract" => SoliditySyntaxKind::Contract,
225 "interface" => SoliditySyntaxKind::Interface,
226 "library" => SoliditySyntaxKind::Library,
227 "function" => SoliditySyntaxKind::Function,
228 "modifier" => SoliditySyntaxKind::Modifier,
229 "event" => SoliditySyntaxKind::Event,
230 "struct" => SoliditySyntaxKind::Struct,
231 "enum" => SoliditySyntaxKind::Enum,
232 "mapping" => SoliditySyntaxKind::Mapping,
233 "public" => SoliditySyntaxKind::Public,
234 "private" => SoliditySyntaxKind::Private,
235 "internal" => SoliditySyntaxKind::Internal,
236 "external" => SoliditySyntaxKind::External,
237 "pure" => SoliditySyntaxKind::Pure,
238 "view" => SoliditySyntaxKind::View,
239 "payable" => SoliditySyntaxKind::Payable,
240 "constant" => SoliditySyntaxKind::Constant,
241 "bool" => SoliditySyntaxKind::Bool,
242 "string" => SoliditySyntaxKind::String,
243 "bytes" => SoliditySyntaxKind::Bytes,
244 "address" => SoliditySyntaxKind::Address,
245 "uint" => SoliditySyntaxKind::Uint,
246 "int" => SoliditySyntaxKind::Int,
247 "fixed" => SoliditySyntaxKind::Fixed,
248 "ufixed" => SoliditySyntaxKind::Ufixed,
249 "if" => SoliditySyntaxKind::If,
250 "else" => SoliditySyntaxKind::Else,
251 "for" => SoliditySyntaxKind::For,
252 "while" => SoliditySyntaxKind::While,
253 "do" => SoliditySyntaxKind::Do,
254 "break" => SoliditySyntaxKind::Break,
255 "continue" => SoliditySyntaxKind::Continue,
256 "return" => SoliditySyntaxKind::Return,
257 "try" => SoliditySyntaxKind::Try,
258 "catch" => SoliditySyntaxKind::Catch,
259 "import" => SoliditySyntaxKind::Import,
260 "pragma" => SoliditySyntaxKind::Pragma,
261 "using" => SoliditySyntaxKind::Using,
262 "is" => SoliditySyntaxKind::Is,
263 "override" => SoliditySyntaxKind::Override,
264 "virtual" => SoliditySyntaxKind::Virtual,
265 "abstract" => SoliditySyntaxKind::Abstract,
266 "true" | "false" => SoliditySyntaxKind::BooleanLiteral,
267 _ => SoliditySyntaxKind::Identifier,
268 }
269 }
270
271 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
273 let start_pos = state.get_position();
274
275 if let Some(ch) = state.peek() {
276 if ch.is_ascii_digit() {
277 state.advance(ch.len_utf8());
278
279 if ch == '0' {
281 if let Some('x') | Some('X') = state.peek() {
282 state.advance(1);
283 while let Some(ch) = state.peek() {
284 if ch.is_ascii_hexdigit() {
285 state.advance(1);
286 }
287 else {
288 break;
289 }
290 }
291 state.add_token(SoliditySyntaxKind::HexLiteral, start_pos, state.get_position());
292 return true;
293 }
294 }
295
296 while let Some(ch) = state.peek() {
298 if ch.is_ascii_digit() {
299 state.advance(1);
300 }
301 else {
302 break;
303 }
304 }
305
306 if let Some('.') = state.peek() {
308 state.advance(1);
309 while let Some(ch) = state.peek() {
310 if ch.is_ascii_digit() {
311 state.advance(1);
312 }
313 else {
314 break;
315 }
316 }
317 }
318
319 if let Some('e') | Some('E') = state.peek() {
321 state.advance(1);
322 if let Some('+') | Some('-') = state.peek() {
323 state.advance(1);
324 }
325 while let Some(ch) = state.peek() {
326 if ch.is_ascii_digit() {
327 state.advance(1);
328 }
329 else {
330 break;
331 }
332 }
333 }
334
335 state.add_token(SoliditySyntaxKind::NumberLiteral, start_pos, state.get_position());
336 true
337 }
338 else {
339 false
340 }
341 }
342 else {
343 false
344 }
345 }
346
347 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
349 let start_pos = state.get_position();
350
351 if let Some(quote) = state.peek() {
352 if quote == '"' || quote == '\'' {
353 state.advance(1);
354 let mut found_end = false;
355
356 while let Some(ch) = state.peek() {
357 if ch == quote {
358 state.advance(1);
359 found_end = true;
360 break;
361 }
362 else if ch == '\\' {
363 state.advance(1);
364 if let Some(_) = state.peek() {
365 state.advance(1);
366 }
367 }
368 else if ch == '\n' || ch == '\r' {
369 break; }
371 else {
372 state.advance(ch.len_utf8());
373 }
374 }
375
376 if found_end {
377 state.add_token(SoliditySyntaxKind::StringLiteral, start_pos, state.get_position());
378 }
379 else {
380 state.add_token(SoliditySyntaxKind::Error, start_pos, state.get_position());
381 }
382 true
383 }
384 else {
385 false
386 }
387 }
388 else {
389 false
390 }
391 }
392
393 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
395 let start_pos = state.get_position();
396
397 if let Some(ch) = state.peek() {
398 let token_kind = match ch {
399 '+' => {
400 state.advance(1);
401 if let Some('=') = state.peek() {
402 state.advance(1);
403 SoliditySyntaxKind::PlusAssign
404 }
405 else {
406 SoliditySyntaxKind::Plus
407 }
408 }
409 '-' => {
410 state.advance(1);
411 if let Some('=') = state.peek() {
412 state.advance(1);
413 SoliditySyntaxKind::MinusAssign
414 }
415 else if let Some('>') = state.peek() {
416 state.advance(1);
417 SoliditySyntaxKind::Arrow
418 }
419 else {
420 SoliditySyntaxKind::Minus
421 }
422 }
423 '*' => {
424 state.advance(1);
425 if let Some('=') = state.peek() {
426 state.advance(1);
427 SoliditySyntaxKind::StarAssign
428 }
429 else if let Some('*') = state.peek() {
430 state.advance(1);
431 SoliditySyntaxKind::Power
432 }
433 else {
434 SoliditySyntaxKind::Star
435 }
436 }
437 '/' => {
438 state.advance(1);
440 if let Some('=') = state.peek() {
441 state.advance(1);
442 SoliditySyntaxKind::SlashAssign
443 }
444 else {
445 SoliditySyntaxKind::Slash
446 }
447 }
448 '%' => {
449 state.advance(1);
450 if let Some('=') = state.peek() {
451 state.advance(1);
452 SoliditySyntaxKind::PercentAssign
453 }
454 else {
455 SoliditySyntaxKind::Percent
456 }
457 }
458 '=' => {
459 state.advance(1);
460 if let Some('=') = state.peek() {
461 state.advance(1);
462 SoliditySyntaxKind::Equal
463 }
464 else {
465 SoliditySyntaxKind::Assign
466 }
467 }
468 '!' => {
469 state.advance(1);
470 if let Some('=') = state.peek() {
471 state.advance(1);
472 SoliditySyntaxKind::NotEqual
473 }
474 else {
475 SoliditySyntaxKind::Not
476 }
477 }
478 '<' => {
479 state.advance(1);
480 if let Some('=') = state.peek() {
481 state.advance(1);
482 SoliditySyntaxKind::LessEqual
483 }
484 else if let Some('<') = state.peek() {
485 state.advance(1);
486 SoliditySyntaxKind::LeftShift
487 }
488 else {
489 SoliditySyntaxKind::Less
490 }
491 }
492 '>' => {
493 state.advance(1);
494 if let Some('=') = state.peek() {
495 state.advance(1);
496 SoliditySyntaxKind::GreaterEqual
497 }
498 else if let Some('>') = state.peek() {
499 state.advance(1);
500 SoliditySyntaxKind::RightShift
501 }
502 else {
503 SoliditySyntaxKind::Greater
504 }
505 }
506 '&' => {
507 state.advance(1);
508 if let Some('&') = state.peek() {
509 state.advance(1);
510 SoliditySyntaxKind::And
511 }
512 else {
513 SoliditySyntaxKind::BitAnd
514 }
515 }
516 '|' => {
517 state.advance(1);
518 if let Some('|') = state.peek() {
519 state.advance(1);
520 SoliditySyntaxKind::Or
521 }
522 else {
523 SoliditySyntaxKind::BitOr
524 }
525 }
526 '^' => {
527 state.advance(1);
528 SoliditySyntaxKind::BitXor
529 }
530 '~' => {
531 state.advance(1);
532 SoliditySyntaxKind::BitNot
533 }
534 _ => return false,
535 };
536
537 state.add_token(token_kind, start_pos, state.get_position());
538 true
539 }
540 else {
541 false
542 }
543 }
544
545 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
547 let start_pos = state.get_position();
548
549 if let Some(ch) = state.peek() {
550 let token_kind = match ch {
551 '(' => SoliditySyntaxKind::LeftParen,
552 ')' => SoliditySyntaxKind::RightParen,
553 '{' => SoliditySyntaxKind::LeftBrace,
554 '}' => SoliditySyntaxKind::RightBrace,
555 '[' => SoliditySyntaxKind::LeftBracket,
556 ']' => SoliditySyntaxKind::RightBracket,
557 ';' => SoliditySyntaxKind::Semicolon,
558 ',' => SoliditySyntaxKind::Comma,
559 '.' => SoliditySyntaxKind::Dot,
560 _ => return false,
561 };
562
563 state.advance(ch.len_utf8());
564 state.add_token(token_kind, start_pos, state.get_position());
565 true
566 }
567 else {
568 false
569 }
570 }
571}