oak_solidity/lexer/
mod.rs

1use crate::{kind::SoliditySyntaxKind, language::SolidityLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, SolidityLanguage>;
5
6#[derive(Clone)]
7pub struct SolidityLexer<'config> {
8    _config: &'config SolidityLanguage,
9}
10
11impl<'config> Lexer<SolidityLanguage> for SolidityLexer<'config> {
12    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<SolidityLanguage>) -> LexOutput<SolidityLanguage> {
13        let mut state = State::new(source);
14        let result = self.run(&mut state);
15        if result.is_ok() {
16            state.add_eof();
17        }
18        state.finish_with_cache(result, cache)
19    }
20}
21
22impl<'config> SolidityLexer<'config> {
23    pub fn new(config: &'config SolidityLanguage) -> Self {
24        Self { _config: config }
25    }
26
27    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
28        while state.not_at_end() {
29            let safe_point = state.get_position();
30
31            if self.skip_whitespace(state) {
32                continue;
33            }
34
35            if self.lex_newline(state) {
36                continue;
37            }
38
39            if self.lex_line_comment(state) {
40                continue;
41            }
42
43            if self.lex_block_comment(state) {
44                continue;
45            }
46
47            if self.lex_identifier_or_keyword(state) {
48                continue;
49            }
50
51            if self.lex_number(state) {
52                continue;
53            }
54
55            if self.lex_string(state) {
56                continue;
57            }
58
59            if self.lex_operator(state) {
60                continue;
61            }
62
63            if self.lex_delimiter(state) {
64                continue;
65            }
66
67            // 如果没有匹配任何规则,跳过当前字符并标记错误
68            let start_pos = state.get_position();
69            if let Some(ch) = state.peek() {
70                state.advance(ch.len_utf8());
71                state.add_token(SoliditySyntaxKind::Error, start_pos, state.get_position());
72            }
73
74            state.advance_if_dead_lock(safe_point);
75        }
76
77        Ok(())
78    }
79
80    /// 跳过空白字符
81    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82        let start_pos = state.get_position();
83
84        while let Some(ch) = state.peek() {
85            if ch == ' ' || ch == '\t' {
86                state.advance(ch.len_utf8());
87            }
88            else {
89                break;
90            }
91        }
92
93        if state.get_position() > start_pos {
94            state.add_token(SoliditySyntaxKind::Whitespace, start_pos, state.get_position());
95            true
96        }
97        else {
98            false
99        }
100    }
101
102    /// 处理换行
103    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
104        let start_pos = state.get_position();
105
106        if let Some('\n') = state.peek() {
107            state.advance(1);
108            state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
109            true
110        }
111        else if let Some('\r') = state.peek() {
112            state.advance(1);
113            if let Some('\n') = state.peek() {
114                state.advance(1);
115            }
116            state.add_token(SoliditySyntaxKind::Newline, start_pos, state.get_position());
117            true
118        }
119        else {
120            false
121        }
122    }
123
124    /// 处理单行注释
125    fn lex_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
126        let start_pos = state.get_position();
127
128        if let Some('/') = state.peek() {
129            state.advance(1);
130            if let Some('/') = state.peek() {
131                state.advance(1);
132
133                while let Some(ch) = state.peek() {
134                    if ch == '\n' || ch == '\r' {
135                        break;
136                    }
137                    else {
138                        state.advance(ch.len_utf8());
139                    }
140                }
141
142                state.add_token(SoliditySyntaxKind::LineComment, start_pos, state.get_position());
143                true
144            }
145            else {
146                state.set_position(start_pos);
147                false
148            }
149        }
150        else {
151            false
152        }
153    }
154
155    /// 处理块注释
156    fn lex_block_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
157        let start_pos = state.get_position();
158
159        if let Some('/') = state.peek() {
160            state.advance(1);
161            if let Some('*') = state.peek() {
162                state.advance(1);
163
164                while let Some(ch) = state.peek() {
165                    if ch == '*' {
166                        state.advance(1);
167                        if let Some('/') = state.peek() {
168                            state.advance(1);
169                            break;
170                        }
171                    }
172                    else {
173                        state.advance(ch.len_utf8());
174                    }
175                }
176
177                state.add_token(SoliditySyntaxKind::BlockComment, start_pos, state.get_position());
178                true
179            }
180            else {
181                state.set_position(start_pos);
182                false
183            }
184        }
185        else {
186            false
187        }
188    }
189
190    /// 处理标识符或关键字
191    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
192        let start_pos = state.get_position();
193
194        if let Some(ch) = state.peek() {
195            if ch.is_ascii_alphabetic() || ch == '_' {
196                state.advance(ch.len_utf8());
197
198                while let Some(ch) = state.peek() {
199                    if ch.is_ascii_alphanumeric() || ch == '_' {
200                        state.advance(ch.len_utf8());
201                    }
202                    else {
203                        break;
204                    }
205                }
206
207                let text = state.get_text_from(start_pos);
208                let token_kind = self.keyword_or_identifier(&text);
209                state.add_token(token_kind, start_pos, state.get_position());
210                true
211            }
212            else {
213                false
214            }
215        }
216        else {
217            false
218        }
219    }
220
221    /// 判断是关键字还是标识
222    fn keyword_or_identifier(&self, text: &str) -> SoliditySyntaxKind {
223        match text {
224            "contract" => SoliditySyntaxKind::Contract,
225            "interface" => SoliditySyntaxKind::Interface,
226            "library" => SoliditySyntaxKind::Library,
227            "function" => SoliditySyntaxKind::Function,
228            "modifier" => SoliditySyntaxKind::Modifier,
229            "event" => SoliditySyntaxKind::Event,
230            "struct" => SoliditySyntaxKind::Struct,
231            "enum" => SoliditySyntaxKind::Enum,
232            "mapping" => SoliditySyntaxKind::Mapping,
233            "public" => SoliditySyntaxKind::Public,
234            "private" => SoliditySyntaxKind::Private,
235            "internal" => SoliditySyntaxKind::Internal,
236            "external" => SoliditySyntaxKind::External,
237            "pure" => SoliditySyntaxKind::Pure,
238            "view" => SoliditySyntaxKind::View,
239            "payable" => SoliditySyntaxKind::Payable,
240            "constant" => SoliditySyntaxKind::Constant,
241            "bool" => SoliditySyntaxKind::Bool,
242            "string" => SoliditySyntaxKind::String,
243            "bytes" => SoliditySyntaxKind::Bytes,
244            "address" => SoliditySyntaxKind::Address,
245            "uint" => SoliditySyntaxKind::Uint,
246            "int" => SoliditySyntaxKind::Int,
247            "fixed" => SoliditySyntaxKind::Fixed,
248            "ufixed" => SoliditySyntaxKind::Ufixed,
249            "if" => SoliditySyntaxKind::If,
250            "else" => SoliditySyntaxKind::Else,
251            "for" => SoliditySyntaxKind::For,
252            "while" => SoliditySyntaxKind::While,
253            "do" => SoliditySyntaxKind::Do,
254            "break" => SoliditySyntaxKind::Break,
255            "continue" => SoliditySyntaxKind::Continue,
256            "return" => SoliditySyntaxKind::Return,
257            "try" => SoliditySyntaxKind::Try,
258            "catch" => SoliditySyntaxKind::Catch,
259            "import" => SoliditySyntaxKind::Import,
260            "pragma" => SoliditySyntaxKind::Pragma,
261            "using" => SoliditySyntaxKind::Using,
262            "is" => SoliditySyntaxKind::Is,
263            "override" => SoliditySyntaxKind::Override,
264            "virtual" => SoliditySyntaxKind::Virtual,
265            "abstract" => SoliditySyntaxKind::Abstract,
266            "true" | "false" => SoliditySyntaxKind::BooleanLiteral,
267            _ => SoliditySyntaxKind::Identifier,
268        }
269    }
270
271    /// 处理数字
272    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
273        let start_pos = state.get_position();
274
275        if let Some(ch) = state.peek() {
276            if ch.is_ascii_digit() {
277                state.advance(ch.len_utf8());
278
279                // 处理十六进制
280                if ch == '0' {
281                    if let Some('x') | Some('X') = state.peek() {
282                        state.advance(1);
283                        while let Some(ch) = state.peek() {
284                            if ch.is_ascii_hexdigit() {
285                                state.advance(1);
286                            }
287                            else {
288                                break;
289                            }
290                        }
291                        state.add_token(SoliditySyntaxKind::HexLiteral, start_pos, state.get_position());
292                        return true;
293                    }
294                }
295
296                // 处理十进制数
297                while let Some(ch) = state.peek() {
298                    if ch.is_ascii_digit() {
299                        state.advance(1);
300                    }
301                    else {
302                        break;
303                    }
304                }
305
306                // 处理小数
307                if let Some('.') = state.peek() {
308                    state.advance(1);
309                    while let Some(ch) = state.peek() {
310                        if ch.is_ascii_digit() {
311                            state.advance(1);
312                        }
313                        else {
314                            break;
315                        }
316                    }
317                }
318
319                // 处理科学计数
320                if let Some('e') | Some('E') = state.peek() {
321                    state.advance(1);
322                    if let Some('+') | Some('-') = state.peek() {
323                        state.advance(1);
324                    }
325                    while let Some(ch) = state.peek() {
326                        if ch.is_ascii_digit() {
327                            state.advance(1);
328                        }
329                        else {
330                            break;
331                        }
332                    }
333                }
334
335                state.add_token(SoliditySyntaxKind::NumberLiteral, start_pos, state.get_position());
336                true
337            }
338            else {
339                false
340            }
341        }
342        else {
343            false
344        }
345    }
346
347    /// 处理字符串
348    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
349        let start_pos = state.get_position();
350
351        if let Some(quote) = state.peek() {
352            if quote == '"' || quote == '\'' {
353                state.advance(1);
354                let mut found_end = false;
355
356                while let Some(ch) = state.peek() {
357                    if ch == quote {
358                        state.advance(1);
359                        found_end = true;
360                        break;
361                    }
362                    else if ch == '\\' {
363                        state.advance(1);
364                        if let Some(_) = state.peek() {
365                            state.advance(1);
366                        }
367                    }
368                    else if ch == '\n' || ch == '\r' {
369                        break; // 字符串不能跨行
370                    }
371                    else {
372                        state.advance(ch.len_utf8());
373                    }
374                }
375
376                if found_end {
377                    state.add_token(SoliditySyntaxKind::StringLiteral, start_pos, state.get_position());
378                }
379                else {
380                    state.add_token(SoliditySyntaxKind::Error, start_pos, state.get_position());
381                }
382                true
383            }
384            else {
385                false
386            }
387        }
388        else {
389            false
390        }
391    }
392
393    /// 处理操作符
394    fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
395        let start_pos = state.get_position();
396
397        if let Some(ch) = state.peek() {
398            let token_kind = match ch {
399                '+' => {
400                    state.advance(1);
401                    if let Some('=') = state.peek() {
402                        state.advance(1);
403                        SoliditySyntaxKind::PlusAssign
404                    }
405                    else {
406                        SoliditySyntaxKind::Plus
407                    }
408                }
409                '-' => {
410                    state.advance(1);
411                    if let Some('=') = state.peek() {
412                        state.advance(1);
413                        SoliditySyntaxKind::MinusAssign
414                    }
415                    else if let Some('>') = state.peek() {
416                        state.advance(1);
417                        SoliditySyntaxKind::Arrow
418                    }
419                    else {
420                        SoliditySyntaxKind::Minus
421                    }
422                }
423                '*' => {
424                    state.advance(1);
425                    if let Some('=') = state.peek() {
426                        state.advance(1);
427                        SoliditySyntaxKind::StarAssign
428                    }
429                    else if let Some('*') = state.peek() {
430                        state.advance(1);
431                        SoliditySyntaxKind::Power
432                    }
433                    else {
434                        SoliditySyntaxKind::Star
435                    }
436                }
437                '/' => {
438                    // 这里不处理注释,因为已经在其他地方处理了
439                    state.advance(1);
440                    if let Some('=') = state.peek() {
441                        state.advance(1);
442                        SoliditySyntaxKind::SlashAssign
443                    }
444                    else {
445                        SoliditySyntaxKind::Slash
446                    }
447                }
448                '%' => {
449                    state.advance(1);
450                    if let Some('=') = state.peek() {
451                        state.advance(1);
452                        SoliditySyntaxKind::PercentAssign
453                    }
454                    else {
455                        SoliditySyntaxKind::Percent
456                    }
457                }
458                '=' => {
459                    state.advance(1);
460                    if let Some('=') = state.peek() {
461                        state.advance(1);
462                        SoliditySyntaxKind::Equal
463                    }
464                    else {
465                        SoliditySyntaxKind::Assign
466                    }
467                }
468                '!' => {
469                    state.advance(1);
470                    if let Some('=') = state.peek() {
471                        state.advance(1);
472                        SoliditySyntaxKind::NotEqual
473                    }
474                    else {
475                        SoliditySyntaxKind::Not
476                    }
477                }
478                '<' => {
479                    state.advance(1);
480                    if let Some('=') = state.peek() {
481                        state.advance(1);
482                        SoliditySyntaxKind::LessEqual
483                    }
484                    else if let Some('<') = state.peek() {
485                        state.advance(1);
486                        SoliditySyntaxKind::LeftShift
487                    }
488                    else {
489                        SoliditySyntaxKind::Less
490                    }
491                }
492                '>' => {
493                    state.advance(1);
494                    if let Some('=') = state.peek() {
495                        state.advance(1);
496                        SoliditySyntaxKind::GreaterEqual
497                    }
498                    else if let Some('>') = state.peek() {
499                        state.advance(1);
500                        SoliditySyntaxKind::RightShift
501                    }
502                    else {
503                        SoliditySyntaxKind::Greater
504                    }
505                }
506                '&' => {
507                    state.advance(1);
508                    if let Some('&') = state.peek() {
509                        state.advance(1);
510                        SoliditySyntaxKind::And
511                    }
512                    else {
513                        SoliditySyntaxKind::BitAnd
514                    }
515                }
516                '|' => {
517                    state.advance(1);
518                    if let Some('|') = state.peek() {
519                        state.advance(1);
520                        SoliditySyntaxKind::Or
521                    }
522                    else {
523                        SoliditySyntaxKind::BitOr
524                    }
525                }
526                '^' => {
527                    state.advance(1);
528                    SoliditySyntaxKind::BitXor
529                }
530                '~' => {
531                    state.advance(1);
532                    SoliditySyntaxKind::BitNot
533                }
534                _ => return false,
535            };
536
537            state.add_token(token_kind, start_pos, state.get_position());
538            true
539        }
540        else {
541            false
542        }
543    }
544
545    /// 处理分隔符
546    fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
547        let start_pos = state.get_position();
548
549        if let Some(ch) = state.peek() {
550            let token_kind = match ch {
551                '(' => SoliditySyntaxKind::LeftParen,
552                ')' => SoliditySyntaxKind::RightParen,
553                '{' => SoliditySyntaxKind::LeftBrace,
554                '}' => SoliditySyntaxKind::RightBrace,
555                '[' => SoliditySyntaxKind::LeftBracket,
556                ']' => SoliditySyntaxKind::RightBracket,
557                ';' => SoliditySyntaxKind::Semicolon,
558                ',' => SoliditySyntaxKind::Comma,
559                '.' => SoliditySyntaxKind::Dot,
560                _ => return false,
561            };
562
563            state.advance(ch.len_utf8());
564            state.add_token(token_kind, start_pos, state.get_position());
565            true
566        }
567        else {
568            false
569        }
570    }
571}