oak_solidity/lexer/
mod.rs

1use crate::{kind::SoliditySyntaxKind, language::SolidityLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, SolidityLanguage>;
5
6#[derive(Clone)]
7pub struct SolidityLexer<'config> {
8    config: &'config SolidityLanguage,
9}
10
11impl<'config> Lexer<SolidityLanguage> for SolidityLexer<'config> {
12    fn lex_incremental(
13        &self,
14        source: impl Source,
15        changed: usize,
16        cache: IncrementalCache<SolidityLanguage>,
17    ) -> LexOutput<SolidityLanguage> {
18        let mut state = LexerState::new_with_cache(source, changed, cache);
19        let result = self.run(&mut state);
20        state.finish(result)
21    }
22}
23
24impl<'config> SolidityLexer<'config> {
25    pub fn new(config: &'config SolidityLanguage) -> Self {
26        Self { config }
27    }
28
29    fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
30        while state.not_at_end() {
31            let safe_point = state.get_position();
32
33            if self.skip_whitespace(state) {
34                continue;
35            }
36
37            if self.lex_newline(state) {
38                continue;
39            }
40
41            if self.lex_line_comment(state) {
42                continue;
43            }
44
45            if self.lex_block_comment(state) {
46                continue;
47            }
48
49            if self.lex_identifier_or_keyword(state) {
50                continue;
51            }
52
53            if self.lex_number(state) {
54                continue;
55            }
56
57            if self.lex_string(state) {
58                continue;
59            }
60
61            if self.lex_operator(state) {
62                continue;
63            }
64
65            if self.lex_delimiter(state) {
66                continue;
67            }
68
69            // 如果没有匹配任何模式,跳过当前字符并添加错误 token
70            if let Some(ch) = state.peek() {
71                state.advance(ch.len_utf8());
72                state.add_token(SoliditySyntaxKind::Error, safe_point, state.get_position());
73            }
74        }
75
76        // 添加 EOF token
77        state.add_token(SoliditySyntaxKind::Eof, state.get_position(), state.get_position());
78        Ok(())
79    }
80
81    /// 跳过空白字符
82    fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
83        let start_pos = state.get_position();
84
85        while let Some(ch) = state.peek() {
86            if ch == ' ' || ch == '\t' {
87                state.advance(ch.len_utf8());
88            }
89            else {
90                break;
91            }
92        }
93
94        if state.get_position() > start_pos {
95            state.add_token(SoliditySyntaxKind::Whitespace, start_pos, state.get_position());
96            true
97        }
98        else {
99            false
100        }
101    }
102
103    /// 处理换行
104    fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
105        let start_pos = state.get_position();
106
107        if let Some('\n') = state.peek() {
108            state.advance(1);
109            state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
110            true
111        }
112        else if let Some('\r') = state.peek() {
113            state.advance(1);
114            if let Some('\n') = state.peek() {
115                state.advance(1);
116            }
117            state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
118            true
119        }
120        else {
121            false
122        }
123    }
124
125    /// 处理行注
126    fn lex_line_comment<S: Source>(&self, state: &mut State<S>) -> bool {
127        let start_pos = state.get_position();
128
129        if let Some('/') = state.peek() {
130            state.advance(1);
131            if let Some('/') = state.peek() {
132                state.advance(1);
133
134                while let Some(ch) = state.peek() {
135                    if ch == '\n' || ch == '\r' {
136                        break;
137                    }
138                    else {
139                        state.advance(ch.len_utf8());
140                    }
141                }
142
143                state.add_token(SoliditySyntaxKind::LineComment, start_pos, state.get_position());
144                true
145            }
146            else {
147                state.set_position(start_pos);
148                false
149            }
150        }
151        else {
152            false
153        }
154    }
155
156    /// 处理块注
157    fn lex_block_comment<S: Source>(&self, state: &mut State<S>) -> bool {
158        let start_pos = state.get_position();
159
160        if let Some('/') = state.peek() {
161            state.advance(1);
162            if let Some('*') = state.peek() {
163                state.advance(1);
164
165                while let Some(ch) = state.peek() {
166                    if ch == '*' {
167                        state.advance(1);
168                        if let Some('/') = state.peek() {
169                            state.advance(1);
170                            break;
171                        }
172                    }
173                    else {
174                        state.advance(ch.len_utf8());
175                    }
176                }
177
178                state.add_token(SoliditySyntaxKind::BlockComment, start_pos, state.get_position());
179                true
180            }
181            else {
182                state.set_position(start_pos);
183                false
184            }
185        }
186        else {
187            false
188        }
189    }
190
191    /// 处理标识符或关键
192    fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
193        let start_pos = state.get_position();
194
195        if let Some(ch) = state.peek() {
196            if ch.is_ascii_alphabetic() || ch == '_' {
197                state.advance(ch.len_utf8());
198
199                while let Some(ch) = state.peek() {
200                    if ch.is_ascii_alphanumeric() || ch == '_' {
201                        state.advance(ch.len_utf8());
202                    }
203                    else {
204                        break;
205                    }
206                }
207
208                let text = state.get_text_from(start_pos);
209                let token_kind = self.keyword_or_identifier(text);
210                state.add_token(token_kind, start_pos, state.get_position());
211                true
212            }
213            else {
214                false
215            }
216        }
217        else {
218            false
219        }
220    }
221
222    /// 判断是关键字还是标识
223    fn keyword_or_identifier(&self, text: &str) -> SoliditySyntaxKind {
224        match text {
225            "contract" => SoliditySyntaxKind::Contract,
226            "interface" => SoliditySyntaxKind::Interface,
227            "library" => SoliditySyntaxKind::Library,
228            "function" => SoliditySyntaxKind::Function,
229            "modifier" => SoliditySyntaxKind::Modifier,
230            "event" => SoliditySyntaxKind::Event,
231            "struct" => SoliditySyntaxKind::Struct,
232            "enum" => SoliditySyntaxKind::Enum,
233            "mapping" => SoliditySyntaxKind::Mapping,
234            "public" => SoliditySyntaxKind::Public,
235            "private" => SoliditySyntaxKind::Private,
236            "internal" => SoliditySyntaxKind::Internal,
237            "external" => SoliditySyntaxKind::External,
238            "pure" => SoliditySyntaxKind::Pure,
239            "view" => SoliditySyntaxKind::View,
240            "payable" => SoliditySyntaxKind::Payable,
241            "constant" => SoliditySyntaxKind::Constant,
242            "bool" => SoliditySyntaxKind::Bool,
243            "string" => SoliditySyntaxKind::String,
244            "bytes" => SoliditySyntaxKind::Bytes,
245            "address" => SoliditySyntaxKind::Address,
246            "uint" => SoliditySyntaxKind::Uint,
247            "int" => SoliditySyntaxKind::Int,
248            "fixed" => SoliditySyntaxKind::Fixed,
249            "ufixed" => SoliditySyntaxKind::Ufixed,
250            "if" => SoliditySyntaxKind::If,
251            "else" => SoliditySyntaxKind::Else,
252            "for" => SoliditySyntaxKind::For,
253            "while" => SoliditySyntaxKind::While,
254            "do" => SoliditySyntaxKind::Do,
255            "break" => SoliditySyntaxKind::Break,
256            "continue" => SoliditySyntaxKind::Continue,
257            "return" => SoliditySyntaxKind::Return,
258            "try" => SoliditySyntaxKind::Try,
259            "catch" => SoliditySyntaxKind::Catch,
260            "import" => SoliditySyntaxKind::Import,
261            "pragma" => SoliditySyntaxKind::Pragma,
262            "using" => SoliditySyntaxKind::Using,
263            "is" => SoliditySyntaxKind::Is,
264            "override" => SoliditySyntaxKind::Override,
265            "virtual" => SoliditySyntaxKind::Virtual,
266            "abstract" => SoliditySyntaxKind::Abstract,
267            "true" | "false" => SoliditySyntaxKind::BooleanLiteral,
268            _ => SoliditySyntaxKind::Identifier,
269        }
270    }
271
272    /// 处理数字字面
273    fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
274        let start_pos = state.get_position();
275
276        if let Some(ch) = state.peek() {
277            if ch.is_ascii_digit() {
278                state.advance(1);
279
280                // 处理十六进制
281                if ch == '0' {
282                    if let Some('x') | Some('X') = state.peek() {
283                        state.advance(1);
284                        while let Some(ch) = state.peek() {
285                            if ch.is_ascii_hexdigit() {
286                                state.advance(1);
287                            }
288                            else {
289                                break;
290                            }
291                        }
292                        state.add_token(SoliditySyntaxKind::HexLiteral, start_pos, state.get_position());
293                        return true;
294                    }
295                }
296
297                // 处理十进制数
298                while let Some(ch) = state.peek() {
299                    if ch.is_ascii_digit() {
300                        state.advance(1);
301                    }
302                    else {
303                        break;
304                    }
305                }
306
307                // 处理小数
308                if let Some('.') = state.peek() {
309                    state.advance(1);
310                    while let Some(ch) = state.peek() {
311                        if ch.is_ascii_digit() {
312                            state.advance(1);
313                        }
314                        else {
315                            break;
316                        }
317                    }
318                }
319
320                // 处理科学计数
321                if let Some('e') | Some('E') = state.peek() {
322                    state.advance(1);
323                    if let Some('+') | Some('-') = state.peek() {
324                        state.advance(1);
325                    }
326                    while let Some(ch) = state.peek() {
327                        if ch.is_ascii_digit() {
328                            state.advance(1);
329                        }
330                        else {
331                            break;
332                        }
333                    }
334                }
335
336                state.add_token(SoliditySyntaxKind::NumberLiteral, start_pos, state.get_position());
337                true
338            }
339            else {
340                false
341            }
342        }
343        else {
344            false
345        }
346    }
347
348    /// 处理字符串字面量
349    fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
350        let start_pos = state.get_position();
351
352        if let Some(quote) = state.peek() {
353            if quote == '"' || quote == '\'' {
354                state.advance(1);
355                let mut found_end = false;
356
357                while let Some(ch) = state.peek() {
358                    if ch == quote {
359                        state.advance(1);
360                        found_end = true;
361                        break;
362                    }
363                    else if ch == '\\' {
364                        state.advance(1);
365                        if let Some(_) = state.peek() {
366                            state.advance(1);
367                        }
368                    }
369                    else if ch == '\n' || ch == '\r' {
370                        break; // 字符串不能跨行
371                    }
372                    else {
373                        state.advance(ch.len_utf8());
374                    }
375                }
376
377                if found_end {
378                    state.add_token(SoliditySyntaxKind::StringLiteral, start_pos, state.get_position());
379                }
380                else {
381                    state.add_token(SoliditySyntaxKind::Error, start_pos, state.get_position());
382                }
383                true
384            }
385            else {
386                false
387            }
388        }
389        else {
390            false
391        }
392    }
393
394    /// 处理操作
395    fn lex_operator<S: Source>(&self, state: &mut State<S>) -> bool {
396        let start_pos = state.get_position();
397
398        if let Some(ch) = state.peek() {
399            let token_kind = match ch {
400                '+' => {
401                    state.advance(1);
402                    if let Some('=') = state.peek() {
403                        state.advance(1);
404                        SoliditySyntaxKind::PlusAssign
405                    }
406                    else {
407                        SoliditySyntaxKind::Plus
408                    }
409                }
410                '-' => {
411                    state.advance(1);
412                    if let Some('=') = state.peek() {
413                        state.advance(1);
414                        SoliditySyntaxKind::MinusAssign
415                    }
416                    else if let Some('>') = state.peek() {
417                        state.advance(1);
418                        SoliditySyntaxKind::Arrow
419                    }
420                    else {
421                        SoliditySyntaxKind::Minus
422                    }
423                }
424                '*' => {
425                    state.advance(1);
426                    if let Some('=') = state.peek() {
427                        state.advance(1);
428                        SoliditySyntaxKind::StarAssign
429                    }
430                    else if let Some('*') = state.peek() {
431                        state.advance(1);
432                        SoliditySyntaxKind::Power
433                    }
434                    else {
435                        SoliditySyntaxKind::Star
436                    }
437                }
438                '/' => {
439                    // 这里不处理注释,因为已经在其他地方处理了
440                    state.advance(1);
441                    if let Some('=') = state.peek() {
442                        state.advance(1);
443                        SoliditySyntaxKind::SlashAssign
444                    }
445                    else {
446                        SoliditySyntaxKind::Slash
447                    }
448                }
449                '%' => {
450                    state.advance(1);
451                    if let Some('=') = state.peek() {
452                        state.advance(1);
453                        SoliditySyntaxKind::PercentAssign
454                    }
455                    else {
456                        SoliditySyntaxKind::Percent
457                    }
458                }
459                '=' => {
460                    state.advance(1);
461                    if let Some('=') = state.peek() {
462                        state.advance(1);
463                        SoliditySyntaxKind::Equal
464                    }
465                    else {
466                        SoliditySyntaxKind::Assign
467                    }
468                }
469                '!' => {
470                    state.advance(1);
471                    if let Some('=') = state.peek() {
472                        state.advance(1);
473                        SoliditySyntaxKind::NotEqual
474                    }
475                    else {
476                        SoliditySyntaxKind::Not
477                    }
478                }
479                '<' => {
480                    state.advance(1);
481                    if let Some('=') = state.peek() {
482                        state.advance(1);
483                        SoliditySyntaxKind::LessEqual
484                    }
485                    else if let Some('<') = state.peek() {
486                        state.advance(1);
487                        SoliditySyntaxKind::LeftShift
488                    }
489                    else {
490                        SoliditySyntaxKind::Less
491                    }
492                }
493                '>' => {
494                    state.advance(1);
495                    if let Some('=') = state.peek() {
496                        state.advance(1);
497                        SoliditySyntaxKind::GreaterEqual
498                    }
499                    else if let Some('>') = state.peek() {
500                        state.advance(1);
501                        SoliditySyntaxKind::RightShift
502                    }
503                    else {
504                        SoliditySyntaxKind::Greater
505                    }
506                }
507                '&' => {
508                    state.advance(1);
509                    if let Some('&') = state.peek() {
510                        state.advance(1);
511                        SoliditySyntaxKind::And
512                    }
513                    else {
514                        SoliditySyntaxKind::BitAnd
515                    }
516                }
517                '|' => {
518                    state.advance(1);
519                    if let Some('|') = state.peek() {
520                        state.advance(1);
521                        SoliditySyntaxKind::Or
522                    }
523                    else {
524                        SoliditySyntaxKind::BitOr
525                    }
526                }
527                '^' => {
528                    state.advance(1);
529                    SoliditySyntaxKind::BitXor
530                }
531                '~' => {
532                    state.advance(1);
533                    SoliditySyntaxKind::BitNot
534                }
535                _ => return false,
536            };
537
538            state.add_token(token_kind, start_pos, state.get_position());
539            true
540        }
541        else {
542            false
543        }
544    }
545
546    /// 处理分隔
547    fn lex_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
548        let start_pos = state.get_position();
549
550        if let Some(ch) = state.peek() {
551            let token_kind = match ch {
552                '(' => SoliditySyntaxKind::LeftParen,
553                ')' => SoliditySyntaxKind::RightParen,
554                '{' => SoliditySyntaxKind::LeftBrace,
555                '}' => SoliditySyntaxKind::RightBrace,
556                '[' => SoliditySyntaxKind::LeftBracket,
557                ']' => SoliditySyntaxKind::RightBracket,
558                ';' => SoliditySyntaxKind::Semicolon,
559                ',' => SoliditySyntaxKind::Comma,
560                '.' => SoliditySyntaxKind::Dot,
561                _ => return false,
562            };
563
564            state.advance(ch.len_utf8());
565            state.add_token(token_kind, start_pos, state.get_position());
566            true
567        }
568        else {
569            false
570        }
571    }
572
573    /// 处理单字符 token
574    fn lex_single_char_token<S: Source>(&self, state: &mut State<S>) -> bool {
575        if let Some(ch) = state.peek() {
576            let start_pos = state.get_position();
577
578            let token_kind = match ch {
579                '(' => SoliditySyntaxKind::LeftParen,
580                ')' => SoliditySyntaxKind::RightParen,
581                '{' => SoliditySyntaxKind::LeftBrace,
582                '}' => SoliditySyntaxKind::RightBrace,
583                '[' => SoliditySyntaxKind::LeftBracket,
584                ']' => SoliditySyntaxKind::RightBracket,
585                ';' => SoliditySyntaxKind::Semicolon,
586                ',' => SoliditySyntaxKind::Comma,
587                '.' => SoliditySyntaxKind::Dot,
588                _ => return false,
589            };
590
591            state.advance(ch.len_utf8());
592            state.add_token(token_kind, start_pos, state.get_position());
593            true
594        }
595        else {
596            false
597        }
598    }
599}