1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::SolidityLanguage, lexer::token_type::SolidityTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, SolidityLanguage>;
8
9#[derive(Clone)]
10pub struct SolidityLexer<'config> {
11 _config: &'config SolidityLanguage,
12}
13
14impl<'config> Lexer<SolidityLanguage> for SolidityLexer<'config> {
15 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<SolidityLanguage>) -> LexOutput<SolidityLanguage> {
16 let mut state = State::new(source);
17 let result = self.run(&mut state);
18 if result.is_ok() {
19 state.add_eof();
20 }
21 state.finish_with_cache(result, cache)
22 }
23}
24
25impl<'config> SolidityLexer<'config> {
26 pub fn new(config: &'config SolidityLanguage) -> Self {
27 Self { _config: config }
28 }
29
30 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
31 while state.not_at_end() {
32 let safe_point = state.get_position();
33
34 if self.skip_whitespace(state) {
35 continue;
36 }
37
38 if self.lex_newline(state) {
39 continue;
40 }
41
42 if self.lex_line_comment(state) {
43 continue;
44 }
45
46 if self.lex_block_comment(state) {
47 continue;
48 }
49
50 if self.lex_identifier_or_keyword(state) {
51 continue;
52 }
53
54 if self.lex_number(state) {
55 continue;
56 }
57
58 if self.lex_string(state) {
59 continue;
60 }
61
62 if self.lex_operator(state) {
63 continue;
64 }
65
66 if self.lex_delimiter(state) {
67 continue;
68 }
69
70 let start_pos = state.get_position();
72 if let Some(ch) = state.peek() {
73 state.advance(ch.len_utf8());
74 state.add_token(SolidityTokenType::Error, start_pos, state.get_position());
75 }
76
77 state.advance_if_dead_lock(safe_point)
78 }
79
80 Ok(())
81 }
82
83 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85 let start_pos = state.get_position();
86
87 while let Some(ch) = state.peek() {
88 if ch == ' ' || ch == '\t' {
89 state.advance(ch.len_utf8());
90 }
91 else {
92 break;
93 }
94 }
95
96 if state.get_position() > start_pos {
97 state.add_token(SolidityTokenType::Whitespace, start_pos, state.get_position());
98 true
99 }
100 else {
101 false
102 }
103 }
104
105 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
107 let start_pos = state.get_position();
108
109 if let Some('\n') = state.peek() {
110 state.advance(1);
111 state.add_token(SolidityTokenType::Newline, start_pos, state.get_position());
112 true
113 }
114 else if let Some('\r') = state.peek() {
115 state.advance(1);
116 if let Some('\n') = state.peek() {
117 state.advance(1);
118 }
119 state.add_token(SolidityTokenType::Newline, start_pos, state.get_position());
120 true
121 }
122 else {
123 false
124 }
125 }
126
127 fn lex_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
129 let start_pos = state.get_position();
130
131 if let Some('/') = state.peek() {
132 state.advance(1);
133 if let Some('/') = state.peek() {
134 state.advance(1);
135
136 while let Some(ch) = state.peek() {
137 if ch == '\n' || ch == '\r' {
138 break;
139 }
140 else {
141 state.advance(ch.len_utf8());
142 }
143 }
144
145 state.add_token(SolidityTokenType::LineComment, start_pos, state.get_position());
146 true
147 }
148 else {
149 state.set_position(start_pos);
150 false
151 }
152 }
153 else {
154 false
155 }
156 }
157
158 fn lex_block_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
160 let start_pos = state.get_position();
161
162 if let Some('/') = state.peek() {
163 state.advance(1);
164 if let Some('*') = state.peek() {
165 state.advance(1);
166
167 while let Some(ch) = state.peek() {
168 if ch == '*' {
169 state.advance(1);
170 if let Some('/') = state.peek() {
171 state.advance(1);
172 break;
173 }
174 }
175 else {
176 state.advance(ch.len_utf8());
177 }
178 }
179
180 state.add_token(SolidityTokenType::BlockComment, start_pos, state.get_position());
181 true
182 }
183 else {
184 state.set_position(start_pos);
185 false
186 }
187 }
188 else {
189 false
190 }
191 }
192
193 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
195 let start_pos = state.get_position();
196
197 if let Some(ch) = state.peek() {
198 if ch.is_ascii_alphabetic() || ch == '_' {
199 state.advance(ch.len_utf8());
200
201 while let Some(ch) = state.peek() {
202 if ch.is_ascii_alphanumeric() || ch == '_' {
203 state.advance(ch.len_utf8());
204 }
205 else {
206 break;
207 }
208 }
209
210 let text = state.get_text_from(start_pos);
211 let token_kind = self.keyword_or_identifier(&text);
212 state.add_token(token_kind, start_pos, state.get_position());
213 true
214 }
215 else {
216 false
217 }
218 }
219 else {
220 false
221 }
222 }
223
224 fn keyword_or_identifier(&self, text: &str) -> SolidityTokenType {
226 match text {
227 "contract" => SolidityTokenType::Contract,
228 "interface" => SolidityTokenType::Interface,
229 "library" => SolidityTokenType::Library,
230 "function" => SolidityTokenType::Function,
231 "modifier" => SolidityTokenType::Modifier,
232 "event" => SolidityTokenType::Event,
233 "struct" => SolidityTokenType::Struct,
234 "enum" => SolidityTokenType::Enum,
235 "mapping" => SolidityTokenType::Mapping,
236 "public" => SolidityTokenType::Public,
237 "private" => SolidityTokenType::Private,
238 "internal" => SolidityTokenType::Internal,
239 "external" => SolidityTokenType::External,
240 "pure" => SolidityTokenType::Pure,
241 "view" => SolidityTokenType::View,
242 "payable" => SolidityTokenType::Payable,
243 "constant" => SolidityTokenType::Constant,
244 "bool" => SolidityTokenType::Bool,
245 "string" => SolidityTokenType::String,
246 "bytes" => SolidityTokenType::Bytes,
247 "address" => SolidityTokenType::Address,
248 "uint" => SolidityTokenType::Uint,
249 "int" => SolidityTokenType::Int,
250 "fixed" => SolidityTokenType::Fixed,
251 "ufixed" => SolidityTokenType::Ufixed,
252 "if" => SolidityTokenType::If,
253 "else" => SolidityTokenType::Else,
254 "for" => SolidityTokenType::For,
255 "while" => SolidityTokenType::While,
256 "do" => SolidityTokenType::Do,
257 "break" => SolidityTokenType::Break,
258 "continue" => SolidityTokenType::Continue,
259 "return" => SolidityTokenType::Return,
260 "try" => SolidityTokenType::Try,
261 "catch" => SolidityTokenType::Catch,
262 "import" => SolidityTokenType::Import,
263 "pragma" => SolidityTokenType::Pragma,
264 "using" => SolidityTokenType::Using,
265 "is" => SolidityTokenType::Is,
266 "override" => SolidityTokenType::Override,
267 "virtual" => SolidityTokenType::Virtual,
268 "abstract" => SolidityTokenType::Abstract,
269 "true" | "false" => SolidityTokenType::BooleanLiteral,
270 _ => SolidityTokenType::Identifier,
271 }
272 }
273
274 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
276 let start_pos = state.get_position();
277
278 if let Some(ch) = state.peek() {
279 if ch.is_ascii_digit() {
280 state.advance(ch.len_utf8());
281
282 if ch == '0' {
284 if let Some('x') | Some('X') = state.peek() {
285 state.advance(1);
286 while let Some(ch) = state.peek() {
287 if ch.is_ascii_hexdigit() {
288 state.advance(1);
289 }
290 else {
291 break;
292 }
293 }
294 state.add_token(SolidityTokenType::HexLiteral, start_pos, state.get_position());
295 return true;
296 }
297 }
298
299 while let Some(ch) = state.peek() {
301 if ch.is_ascii_digit() {
302 state.advance(1);
303 }
304 else {
305 break;
306 }
307 }
308
309 if let Some('.') = state.peek() {
311 state.advance(1);
312 while let Some(ch) = state.peek() {
313 if ch.is_ascii_digit() {
314 state.advance(1);
315 }
316 else {
317 break;
318 }
319 }
320 }
321
322 if let Some('e') | Some('E') = state.peek() {
324 state.advance(1);
325 if let Some('+') | Some('-') = state.peek() {
326 state.advance(1);
327 }
328 while let Some(ch) = state.peek() {
329 if ch.is_ascii_digit() {
330 state.advance(1);
331 }
332 else {
333 break;
334 }
335 }
336 }
337
338 state.add_token(SolidityTokenType::NumberLiteral, start_pos, state.get_position());
339 true
340 }
341 else {
342 false
343 }
344 }
345 else {
346 false
347 }
348 }
349
350 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
352 let start_pos = state.get_position();
353
354 if let Some(quote) = state.peek() {
355 if quote == '"' || quote == '\'' {
356 state.advance(1);
357 let mut found_end = false;
358
359 while let Some(ch) = state.peek() {
360 if ch == quote {
361 state.advance(1);
362 found_end = true;
363 break;
364 }
365 else if ch == '\\' {
366 state.advance(1);
367 if let Some(_) = state.peek() {
368 state.advance(1);
369 }
370 }
371 else if ch == '\n' || ch == '\r' {
372 break; }
374 else {
375 state.advance(ch.len_utf8());
376 }
377 }
378
379 if found_end {
380 state.add_token(SolidityTokenType::StringLiteral, start_pos, state.get_position());
381 }
382 else {
383 state.add_token(SolidityTokenType::Error, start_pos, state.get_position())
384 }
385 true
386 }
387 else {
388 false
389 }
390 }
391 else {
392 false
393 }
394 }
395
396 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
398 let start_pos = state.get_position();
399
400 if let Some(ch) = state.peek() {
401 let token_kind = match ch {
402 '+' => {
403 state.advance(1);
404 if let Some('=') = state.peek() {
405 state.advance(1);
406 SolidityTokenType::PlusAssign
407 }
408 else {
409 SolidityTokenType::Plus
410 }
411 }
412 '-' => {
413 state.advance(1);
414 if let Some('=') = state.peek() {
415 state.advance(1);
416 SolidityTokenType::MinusAssign
417 }
418 else if let Some('>') = state.peek() {
419 state.advance(1);
420 SolidityTokenType::Arrow
421 }
422 else {
423 SolidityTokenType::Minus
424 }
425 }
426 '*' => {
427 state.advance(1);
428 if let Some('=') = state.peek() {
429 state.advance(1);
430 SolidityTokenType::StarAssign
431 }
432 else if let Some('*') = state.peek() {
433 state.advance(1);
434 SolidityTokenType::Power
435 }
436 else {
437 SolidityTokenType::Star
438 }
439 }
440 '/' => {
441 state.advance(1);
443 if let Some('=') = state.peek() {
444 state.advance(1);
445 SolidityTokenType::SlashAssign
446 }
447 else {
448 SolidityTokenType::Slash
449 }
450 }
451 '%' => {
452 state.advance(1);
453 if let Some('=') = state.peek() {
454 state.advance(1);
455 SolidityTokenType::PercentAssign
456 }
457 else {
458 SolidityTokenType::Percent
459 }
460 }
461 '=' => {
462 state.advance(1);
463 if let Some('=') = state.peek() {
464 state.advance(1);
465 SolidityTokenType::Equal
466 }
467 else {
468 SolidityTokenType::Assign
469 }
470 }
471 '!' => {
472 state.advance(1);
473 if let Some('=') = state.peek() {
474 state.advance(1);
475 SolidityTokenType::NotEqual
476 }
477 else {
478 SolidityTokenType::Not
479 }
480 }
481 '<' => {
482 state.advance(1);
483 if let Some('=') = state.peek() {
484 state.advance(1);
485 SolidityTokenType::LessEqual
486 }
487 else if let Some('<') = state.peek() {
488 state.advance(1);
489 SolidityTokenType::LeftShift
490 }
491 else {
492 SolidityTokenType::Less
493 }
494 }
495 '>' => {
496 state.advance(1);
497 if let Some('=') = state.peek() {
498 state.advance(1);
499 SolidityTokenType::GreaterEqual
500 }
501 else if let Some('>') = state.peek() {
502 state.advance(1);
503 SolidityTokenType::RightShift
504 }
505 else {
506 SolidityTokenType::Greater
507 }
508 }
509 '&' => {
510 state.advance(1);
511 if let Some('&') = state.peek() {
512 state.advance(1);
513 SolidityTokenType::And
514 }
515 else {
516 SolidityTokenType::BitAnd
517 }
518 }
519 '|' => {
520 state.advance(1);
521 if let Some('|') = state.peek() {
522 state.advance(1);
523 SolidityTokenType::Or
524 }
525 else {
526 SolidityTokenType::BitOr
527 }
528 }
529 '^' => {
530 state.advance(1);
531 SolidityTokenType::BitXor
532 }
533 '~' => {
534 state.advance(1);
535 SolidityTokenType::BitNot
536 }
537 _ => return false,
538 };
539
540 state.add_token(token_kind, start_pos, state.get_position());
541 true
542 }
543 else {
544 false
545 }
546 }
547
548 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
550 let start_pos = state.get_position();
551
552 if let Some(ch) = state.peek() {
553 let token_kind = match ch {
554 '(' => SolidityTokenType::LeftParen,
555 ')' => SolidityTokenType::RightParen,
556 '{' => SolidityTokenType::LeftBrace,
557 '}' => SolidityTokenType::RightBrace,
558 '[' => SolidityTokenType::LeftBracket,
559 ']' => SolidityTokenType::RightBracket,
560 ';' => SolidityTokenType::Semicolon,
561 ',' => SolidityTokenType::Comma,
562 '.' => SolidityTokenType::Dot,
563 _ => return false,
564 };
565
566 state.advance(ch.len_utf8());
567 state.add_token(token_kind, start_pos, state.get_position());
568 true
569 }
570 else {
571 false
572 }
573 }
574}