Skip to main content

oak_solidity/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::SolidityLanguage, lexer::token_type::SolidityTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, SolidityLanguage>;
8
9#[derive(Clone)]
10pub struct SolidityLexer<'config> {
11    _config: &'config SolidityLanguage,
12}
13
14impl<'config> Lexer<SolidityLanguage> for SolidityLexer<'config> {
15    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<SolidityLanguage>) -> LexOutput<SolidityLanguage> {
16        let mut state = State::new(source);
17        let result = self.run(&mut state);
18        if result.is_ok() {
19            state.add_eof();
20        }
21        state.finish_with_cache(result, cache)
22    }
23}
24
25impl<'config> SolidityLexer<'config> {
26    pub fn new(config: &'config SolidityLanguage) -> Self {
27        Self { _config: config }
28    }
29
30    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
31        while state.not_at_end() {
32            let safe_point = state.get_position();
33
34            if self.skip_whitespace(state) {
35                continue;
36            }
37
38            if self.lex_newline(state) {
39                continue;
40            }
41
42            if self.lex_line_comment(state) {
43                continue;
44            }
45
46            if self.lex_block_comment(state) {
47                continue;
48            }
49
50            if self.lex_identifier_or_keyword(state) {
51                continue;
52            }
53
54            if self.lex_number(state) {
55                continue;
56            }
57
58            if self.lex_string(state) {
59                continue;
60            }
61
62            if self.lex_operator(state) {
63                continue;
64            }
65
66            if self.lex_delimiter(state) {
67                continue;
68            }
69
70            // 如果没有匹配任何规则,跳过当前字符并标记错误
71            let start_pos = state.get_position();
72            if let Some(ch) = state.peek() {
73                state.advance(ch.len_utf8());
74                state.add_token(SolidityTokenType::Error, start_pos, state.get_position());
75            }
76
77            state.advance_if_dead_lock(safe_point)
78        }
79
80        Ok(())
81    }
82
83    /// 跳过空白字符
84    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85        let start_pos = state.get_position();
86
87        while let Some(ch) = state.peek() {
88            if ch == ' ' || ch == '\t' {
89                state.advance(ch.len_utf8());
90            }
91            else {
92                break;
93            }
94        }
95
96        if state.get_position() > start_pos {
97            state.add_token(SolidityTokenType::Whitespace, start_pos, state.get_position());
98            true
99        }
100        else {
101            false
102        }
103    }
104
105    /// 处理换行
106    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
107        let start_pos = state.get_position();
108
109        if let Some('\n') = state.peek() {
110            state.advance(1);
111            state.add_token(SolidityTokenType::Newline, start_pos, state.get_position());
112            true
113        }
114        else if let Some('\r') = state.peek() {
115            state.advance(1);
116            if let Some('\n') = state.peek() {
117                state.advance(1);
118            }
119            state.add_token(SolidityTokenType::Newline, start_pos, state.get_position());
120            true
121        }
122        else {
123            false
124        }
125    }
126
127    /// 处理单行注释
128    fn lex_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
129        let start_pos = state.get_position();
130
131        if let Some('/') = state.peek() {
132            state.advance(1);
133            if let Some('/') = state.peek() {
134                state.advance(1);
135
136                while let Some(ch) = state.peek() {
137                    if ch == '\n' || ch == '\r' {
138                        break;
139                    }
140                    else {
141                        state.advance(ch.len_utf8());
142                    }
143                }
144
145                state.add_token(SolidityTokenType::LineComment, start_pos, state.get_position());
146                true
147            }
148            else {
149                state.set_position(start_pos);
150                false
151            }
152        }
153        else {
154            false
155        }
156    }
157
158    /// 处理块注释
159    fn lex_block_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
160        let start_pos = state.get_position();
161
162        if let Some('/') = state.peek() {
163            state.advance(1);
164            if let Some('*') = state.peek() {
165                state.advance(1);
166
167                while let Some(ch) = state.peek() {
168                    if ch == '*' {
169                        state.advance(1);
170                        if let Some('/') = state.peek() {
171                            state.advance(1);
172                            break;
173                        }
174                    }
175                    else {
176                        state.advance(ch.len_utf8());
177                    }
178                }
179
180                state.add_token(SolidityTokenType::BlockComment, start_pos, state.get_position());
181                true
182            }
183            else {
184                state.set_position(start_pos);
185                false
186            }
187        }
188        else {
189            false
190        }
191    }
192
193    /// 处理标识符或关键字
194    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
195        let start_pos = state.get_position();
196
197        if let Some(ch) = state.peek() {
198            if ch.is_ascii_alphabetic() || ch == '_' {
199                state.advance(ch.len_utf8());
200
201                while let Some(ch) = state.peek() {
202                    if ch.is_ascii_alphanumeric() || ch == '_' {
203                        state.advance(ch.len_utf8());
204                    }
205                    else {
206                        break;
207                    }
208                }
209
210                let text = state.get_text_from(start_pos);
211                let token_kind = self.keyword_or_identifier(&text);
212                state.add_token(token_kind, start_pos, state.get_position());
213                true
214            }
215            else {
216                false
217            }
218        }
219        else {
220            false
221        }
222    }
223
224    /// 判断是关键字还是标识
225    fn keyword_or_identifier(&self, text: &str) -> SolidityTokenType {
226        match text {
227            "contract" => SolidityTokenType::Contract,
228            "interface" => SolidityTokenType::Interface,
229            "library" => SolidityTokenType::Library,
230            "function" => SolidityTokenType::Function,
231            "modifier" => SolidityTokenType::Modifier,
232            "event" => SolidityTokenType::Event,
233            "struct" => SolidityTokenType::Struct,
234            "enum" => SolidityTokenType::Enum,
235            "mapping" => SolidityTokenType::Mapping,
236            "public" => SolidityTokenType::Public,
237            "private" => SolidityTokenType::Private,
238            "internal" => SolidityTokenType::Internal,
239            "external" => SolidityTokenType::External,
240            "pure" => SolidityTokenType::Pure,
241            "view" => SolidityTokenType::View,
242            "payable" => SolidityTokenType::Payable,
243            "constant" => SolidityTokenType::Constant,
244            "bool" => SolidityTokenType::Bool,
245            "string" => SolidityTokenType::String,
246            "bytes" => SolidityTokenType::Bytes,
247            "address" => SolidityTokenType::Address,
248            "uint" => SolidityTokenType::Uint,
249            "int" => SolidityTokenType::Int,
250            "fixed" => SolidityTokenType::Fixed,
251            "ufixed" => SolidityTokenType::Ufixed,
252            "if" => SolidityTokenType::If,
253            "else" => SolidityTokenType::Else,
254            "for" => SolidityTokenType::For,
255            "while" => SolidityTokenType::While,
256            "do" => SolidityTokenType::Do,
257            "break" => SolidityTokenType::Break,
258            "continue" => SolidityTokenType::Continue,
259            "return" => SolidityTokenType::Return,
260            "try" => SolidityTokenType::Try,
261            "catch" => SolidityTokenType::Catch,
262            "import" => SolidityTokenType::Import,
263            "pragma" => SolidityTokenType::Pragma,
264            "using" => SolidityTokenType::Using,
265            "is" => SolidityTokenType::Is,
266            "override" => SolidityTokenType::Override,
267            "virtual" => SolidityTokenType::Virtual,
268            "abstract" => SolidityTokenType::Abstract,
269            "true" | "false" => SolidityTokenType::BooleanLiteral,
270            _ => SolidityTokenType::Identifier,
271        }
272    }
273
274    /// 处理数字
275    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
276        let start_pos = state.get_position();
277
278        if let Some(ch) = state.peek() {
279            if ch.is_ascii_digit() {
280                state.advance(ch.len_utf8());
281
282                // 处理十六进制
283                if ch == '0' {
284                    if let Some('x') | Some('X') = state.peek() {
285                        state.advance(1);
286                        while let Some(ch) = state.peek() {
287                            if ch.is_ascii_hexdigit() {
288                                state.advance(1);
289                            }
290                            else {
291                                break;
292                            }
293                        }
294                        state.add_token(SolidityTokenType::HexLiteral, start_pos, state.get_position());
295                        return true;
296                    }
297                }
298
299                // 处理十进制数
300                while let Some(ch) = state.peek() {
301                    if ch.is_ascii_digit() {
302                        state.advance(1);
303                    }
304                    else {
305                        break;
306                    }
307                }
308
309                // 处理小数
310                if let Some('.') = state.peek() {
311                    state.advance(1);
312                    while let Some(ch) = state.peek() {
313                        if ch.is_ascii_digit() {
314                            state.advance(1);
315                        }
316                        else {
317                            break;
318                        }
319                    }
320                }
321
322                // 处理科学计数
323                if let Some('e') | Some('E') = state.peek() {
324                    state.advance(1);
325                    if let Some('+') | Some('-') = state.peek() {
326                        state.advance(1);
327                    }
328                    while let Some(ch) = state.peek() {
329                        if ch.is_ascii_digit() {
330                            state.advance(1);
331                        }
332                        else {
333                            break;
334                        }
335                    }
336                }
337
338                state.add_token(SolidityTokenType::NumberLiteral, start_pos, state.get_position());
339                true
340            }
341            else {
342                false
343            }
344        }
345        else {
346            false
347        }
348    }
349
350    /// 处理字符串
351    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
352        let start_pos = state.get_position();
353
354        if let Some(quote) = state.peek() {
355            if quote == '"' || quote == '\'' {
356                state.advance(1);
357                let mut found_end = false;
358
359                while let Some(ch) = state.peek() {
360                    if ch == quote {
361                        state.advance(1);
362                        found_end = true;
363                        break;
364                    }
365                    else if ch == '\\' {
366                        state.advance(1);
367                        if let Some(_) = state.peek() {
368                            state.advance(1);
369                        }
370                    }
371                    else if ch == '\n' || ch == '\r' {
372                        break; // 字符串不能跨行
373                    }
374                    else {
375                        state.advance(ch.len_utf8());
376                    }
377                }
378
379                if found_end {
380                    state.add_token(SolidityTokenType::StringLiteral, start_pos, state.get_position());
381                }
382                else {
383                    state.add_token(SolidityTokenType::Error, start_pos, state.get_position())
384                }
385                true
386            }
387            else {
388                false
389            }
390        }
391        else {
392            false
393        }
394    }
395
396    /// 处理操作符
397    fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
398        let start_pos = state.get_position();
399
400        if let Some(ch) = state.peek() {
401            let token_kind = match ch {
402                '+' => {
403                    state.advance(1);
404                    if let Some('=') = state.peek() {
405                        state.advance(1);
406                        SolidityTokenType::PlusAssign
407                    }
408                    else {
409                        SolidityTokenType::Plus
410                    }
411                }
412                '-' => {
413                    state.advance(1);
414                    if let Some('=') = state.peek() {
415                        state.advance(1);
416                        SolidityTokenType::MinusAssign
417                    }
418                    else if let Some('>') = state.peek() {
419                        state.advance(1);
420                        SolidityTokenType::Arrow
421                    }
422                    else {
423                        SolidityTokenType::Minus
424                    }
425                }
426                '*' => {
427                    state.advance(1);
428                    if let Some('=') = state.peek() {
429                        state.advance(1);
430                        SolidityTokenType::StarAssign
431                    }
432                    else if let Some('*') = state.peek() {
433                        state.advance(1);
434                        SolidityTokenType::Power
435                    }
436                    else {
437                        SolidityTokenType::Star
438                    }
439                }
440                '/' => {
441                    // 这里不处理注释,因为已经在其他地方处理了
442                    state.advance(1);
443                    if let Some('=') = state.peek() {
444                        state.advance(1);
445                        SolidityTokenType::SlashAssign
446                    }
447                    else {
448                        SolidityTokenType::Slash
449                    }
450                }
451                '%' => {
452                    state.advance(1);
453                    if let Some('=') = state.peek() {
454                        state.advance(1);
455                        SolidityTokenType::PercentAssign
456                    }
457                    else {
458                        SolidityTokenType::Percent
459                    }
460                }
461                '=' => {
462                    state.advance(1);
463                    if let Some('=') = state.peek() {
464                        state.advance(1);
465                        SolidityTokenType::Equal
466                    }
467                    else {
468                        SolidityTokenType::Assign
469                    }
470                }
471                '!' => {
472                    state.advance(1);
473                    if let Some('=') = state.peek() {
474                        state.advance(1);
475                        SolidityTokenType::NotEqual
476                    }
477                    else {
478                        SolidityTokenType::Not
479                    }
480                }
481                '<' => {
482                    state.advance(1);
483                    if let Some('=') = state.peek() {
484                        state.advance(1);
485                        SolidityTokenType::LessEqual
486                    }
487                    else if let Some('<') = state.peek() {
488                        state.advance(1);
489                        SolidityTokenType::LeftShift
490                    }
491                    else {
492                        SolidityTokenType::Less
493                    }
494                }
495                '>' => {
496                    state.advance(1);
497                    if let Some('=') = state.peek() {
498                        state.advance(1);
499                        SolidityTokenType::GreaterEqual
500                    }
501                    else if let Some('>') = state.peek() {
502                        state.advance(1);
503                        SolidityTokenType::RightShift
504                    }
505                    else {
506                        SolidityTokenType::Greater
507                    }
508                }
509                '&' => {
510                    state.advance(1);
511                    if let Some('&') = state.peek() {
512                        state.advance(1);
513                        SolidityTokenType::And
514                    }
515                    else {
516                        SolidityTokenType::BitAnd
517                    }
518                }
519                '|' => {
520                    state.advance(1);
521                    if let Some('|') = state.peek() {
522                        state.advance(1);
523                        SolidityTokenType::Or
524                    }
525                    else {
526                        SolidityTokenType::BitOr
527                    }
528                }
529                '^' => {
530                    state.advance(1);
531                    SolidityTokenType::BitXor
532                }
533                '~' => {
534                    state.advance(1);
535                    SolidityTokenType::BitNot
536                }
537                _ => return false,
538            };
539
540            state.add_token(token_kind, start_pos, state.get_position());
541            true
542        }
543        else {
544            false
545        }
546    }
547
548    /// 处理分隔符
549    fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
550        let start_pos = state.get_position();
551
552        if let Some(ch) = state.peek() {
553            let token_kind = match ch {
554                '(' => SolidityTokenType::LeftParen,
555                ')' => SolidityTokenType::RightParen,
556                '{' => SolidityTokenType::LeftBrace,
557                '}' => SolidityTokenType::RightBrace,
558                '[' => SolidityTokenType::LeftBracket,
559                ']' => SolidityTokenType::RightBracket,
560                ';' => SolidityTokenType::Semicolon,
561                ',' => SolidityTokenType::Comma,
562                '.' => SolidityTokenType::Dot,
563                _ => return false,
564            };
565
566            state.advance(ch.len_utf8());
567            state.add_token(token_kind, start_pos, state.get_position());
568            true
569        }
570        else {
571            false
572        }
573    }
574}