oak_swift/
lexer.rs

1use crate::{kind::SwiftSyntaxKind, language::SwiftLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, SwiftLanguage>;
5
6pub struct SwiftLexer<'config> {
7    config: &'config SwiftLanguage,
8}
9
10impl<'config> SwiftLexer<'config> {
11    pub fn new(config: &'config SwiftLanguage) -> Self {
12        Self { config }
13    }
14
15    /// 跳过空白字符
16    fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
17        let start_pos = state.get_position();
18
19        while let Some(ch) = state.peek() {
20            if ch == ' ' || ch == '\t' {
21                state.advance(ch.len_utf8());
22            }
23            else {
24                break;
25            }
26        }
27
28        if state.get_position() > start_pos {
29            state.add_token(SwiftSyntaxKind::Whitespace, start_pos, state.get_position());
30            true
31        }
32        else {
33            false
34        }
35    }
36
37    /// 处理换行
38    fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
39        let start_pos = state.get_position();
40
41        if let Some('\n') = state.peek() {
42            state.advance(1);
43            state.add_token(SwiftSyntaxKind::Newline, start_pos, state.get_position());
44            true
45        }
46        else if let Some('\r') = state.peek() {
47            state.advance(1);
48            if let Some('\n') = state.peek() {
49                state.advance(1);
50            }
51            state.add_token(SwiftSyntaxKind::Newline, start_pos, state.get_position());
52            true
53        }
54        else {
55            false
56        }
57    }
58
59    /// 处理注释
60    fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
61        let start_pos = state.get_position();
62
63        if let Some('/') = state.peek() {
64            if let Some('/') = state.peek_next_n(1) {
65                // 单行注释
66                state.advance(2);
67                while let Some(ch) = state.peek() {
68                    if ch == '\n' || ch == '\r' {
69                        break;
70                    }
71                    state.advance(ch.len_utf8());
72                }
73                state.add_token(SwiftSyntaxKind::Comment, start_pos, state.get_position());
74                true
75            }
76            else if let Some('*') = state.peek_next_n(1) {
77                // 多行注释
78                state.advance(2);
79                let mut depth = 1;
80                while let Some(ch) = state.peek() {
81                    if ch == '/'
82                        && let Some('*') = state.peek_next_n(1)
83                    {
84                        state.advance(2);
85                        depth += 1;
86                    }
87                    else if ch == '*'
88                        && let Some('/') = state.peek_next_n(1)
89                    {
90                        state.advance(2);
91                        depth -= 1;
92                        if depth == 0 {
93                            break;
94                        }
95                    }
96                    else {
97                        state.advance(ch.len_utf8());
98                    }
99                }
100                state.add_token(SwiftSyntaxKind::Comment, start_pos, state.get_position());
101                true
102            }
103            else {
104                false
105            }
106        }
107        else {
108            false
109        }
110    }
111
112    /// 处理标识符或关键
113    fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
114        let start_pos = state.get_position();
115
116        // 处理反引号标识符 `identifier`
117        let is_escaped = if let Some('`') = state.peek() {
118            state.advance(1);
119            true
120        }
121        else {
122            false
123        };
124
125        if let Some(ch) = state.peek() {
126            if ch.is_ascii_alphabetic() || ch == '_' {
127                state.advance(ch.len_utf8());
128
129                while let Some(ch) = state.peek() {
130                    if ch.is_ascii_alphanumeric() || ch == '_' {
131                        state.advance(ch.len_utf8());
132                    }
133                    else {
134                        break;
135                    }
136                }
137
138                // 如果是转义标识符,需要匹配结束的反引
139                if is_escaped {
140                    if let Some('`') = state.peek() {
141                        state.advance(1);
142                    }
143                    state.add_token(SwiftSyntaxKind::Identifier, start_pos, state.get_position());
144                    return true;
145                }
146
147                // 检查是否为关键
148                let text = state.get_text_in(std::range::Range { start: start_pos, end: state.get_position() });
149
150                let token_kind = match text {
151                    "class" => SwiftSyntaxKind::Class,
152                    "struct" => SwiftSyntaxKind::Struct,
153                    "enum" => SwiftSyntaxKind::Enum,
154                    "protocol" => SwiftSyntaxKind::Protocol,
155                    "extension" => SwiftSyntaxKind::Extension,
156                    "func" => SwiftSyntaxKind::Func,
157                    "var" => SwiftSyntaxKind::Var,
158                    "let" => SwiftSyntaxKind::Let,
159                    "init" => SwiftSyntaxKind::Init,
160                    "deinit" => SwiftSyntaxKind::Deinit,
161                    "subscript" => SwiftSyntaxKind::Subscript,
162                    "typealias" => SwiftSyntaxKind::Typealias,
163                    "import" => SwiftSyntaxKind::Import,
164                    "if" => SwiftSyntaxKind::If,
165                    "else" => SwiftSyntaxKind::Else,
166                    "switch" => SwiftSyntaxKind::Switch,
167                    "case" => SwiftSyntaxKind::Case,
168                    "default" => SwiftSyntaxKind::Default,
169                    "for" => SwiftSyntaxKind::For,
170                    "while" => SwiftSyntaxKind::While,
171                    "repeat" => SwiftSyntaxKind::Repeat,
172                    "do" => SwiftSyntaxKind::Do,
173                    "break" => SwiftSyntaxKind::Break,
174                    "continue" => SwiftSyntaxKind::Continue,
175                    "fallthrough" => SwiftSyntaxKind::Fallthrough,
176                    "return" => SwiftSyntaxKind::Return,
177                    "throw" => SwiftSyntaxKind::Throw,
178                    "try" => SwiftSyntaxKind::Try,
179                    "catch" => SwiftSyntaxKind::Catch,
180                    "finally" => SwiftSyntaxKind::Finally,
181                    "guard" => SwiftSyntaxKind::Guard,
182                    "defer" => SwiftSyntaxKind::Defer,
183                    "public" => SwiftSyntaxKind::Public,
184                    "private" => SwiftSyntaxKind::Private,
185                    "internal" => SwiftSyntaxKind::Internal,
186                    "fileprivate" => SwiftSyntaxKind::Fileprivate,
187                    "open" => SwiftSyntaxKind::Open,
188                    "static" => SwiftSyntaxKind::Static,
189                    "final" => SwiftSyntaxKind::Final,
190                    "override" => SwiftSyntaxKind::Override,
191                    "mutating" => SwiftSyntaxKind::Mutating,
192                    "nonmutating" => SwiftSyntaxKind::Nonmutating,
193                    "lazy" => SwiftSyntaxKind::Lazy,
194                    "weak" => SwiftSyntaxKind::Weak,
195                    "unowned" => SwiftSyntaxKind::Unowned,
196                    "optional" => SwiftSyntaxKind::Optional,
197                    "required" => SwiftSyntaxKind::Required,
198                    "convenience" => SwiftSyntaxKind::Convenience,
199                    "dynamic" => SwiftSyntaxKind::Dynamic,
200                    "infix" => SwiftSyntaxKind::Infix,
201                    "prefix" => SwiftSyntaxKind::Prefix,
202                    "postfix" => SwiftSyntaxKind::Postfix,
203                    "Any" => SwiftSyntaxKind::Any,
204                    "AnyObject" => SwiftSyntaxKind::AnyObject,
205                    "Self" => SwiftSyntaxKind::Self_,
206                    "Type" => SwiftSyntaxKind::Type,
207                    "Protocol" => SwiftSyntaxKind::Protocol_,
208                    "true" => SwiftSyntaxKind::True,
209                    "false" => SwiftSyntaxKind::False,
210                    "nil" => SwiftSyntaxKind::Nil,
211                    "as" => SwiftSyntaxKind::As,
212                    "is" => SwiftSyntaxKind::Is,
213                    "in" => SwiftSyntaxKind::In,
214                    "where" => SwiftSyntaxKind::Where,
215                    "associatedtype" => SwiftSyntaxKind::Associatedtype,
216                    "operator" => SwiftSyntaxKind::Operator,
217                    "precedencegroup" => SwiftSyntaxKind::Precedencegroup,
218                    "indirect" => SwiftSyntaxKind::Indirect,
219                    "rethrows" => SwiftSyntaxKind::Rethrows,
220                    "throws" => SwiftSyntaxKind::Throws,
221                    "inout" => SwiftSyntaxKind::Inout,
222                    _ => SwiftSyntaxKind::Identifier,
223                };
224                state.add_token(token_kind, start_pos, state.get_position());
225                true
226            }
227            else {
228                if is_escaped {
229                    // 回退反引
230                    state.set_position(start_pos);
231                }
232                false
233            }
234        }
235        else {
236            if is_escaped {
237                // 回退反引
238                state.set_position(start_pos);
239            }
240            false
241        }
242    }
243
244    /// 处理数字字面
245    fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
246        let start_pos = state.get_position();
247
248        if let Some(ch) = state.peek() {
249            if ch.is_ascii_digit() {
250                state.advance(1);
251
252                // 处理二进制、八进制、十六进制
253                if ch == '0' {
254                    if let Some('b') | Some('B') = state.peek() {
255                        state.advance(1);
256                        while let Some(ch) = state.peek() {
257                            if ch == '0' || ch == '1' || ch == '_' {
258                                state.advance(1);
259                            }
260                            else {
261                                break;
262                            }
263                        }
264                    }
265                    else if let Some('o') | Some('O') = state.peek() {
266                        state.advance(1);
267                        while let Some(ch) = state.peek() {
268                            if ch.is_ascii_digit() && ch < '8' || ch == '_' {
269                                state.advance(1);
270                            }
271                            else {
272                                break;
273                            }
274                        }
275                    }
276                    else if let Some('x') | Some('X') = state.peek() {
277                        state.advance(1);
278                        while let Some(ch) = state.peek() {
279                            if ch.is_ascii_hexdigit() || ch == '_' {
280                                state.advance(1);
281                            }
282                            else {
283                                break;
284                            }
285                        }
286                    }
287                    else {
288                        // 普通十进制数字
289                        while let Some(ch) = state.peek() {
290                            if ch.is_ascii_digit() || ch == '_' {
291                                state.advance(1);
292                            }
293                            else {
294                                break;
295                            }
296                        }
297                    }
298                }
299                else {
300                    // 十进制数
301                    while let Some(ch) = state.peek() {
302                        if ch.is_ascii_digit() || ch == '_' {
303                            state.advance(1);
304                        }
305                        else {
306                            break;
307                        }
308                    }
309                }
310
311                // 处理小数
312                if let Some('.') = state.peek() {
313                    state.advance(1);
314                    while let Some(ch) = state.peek() {
315                        if ch.is_ascii_digit() || ch == '_' {
316                            state.advance(1);
317                        }
318                        else {
319                            break;
320                        }
321                    }
322                }
323
324                // 处理指数
325                if let Some('e') | Some('E') = state.peek() {
326                    state.advance(1);
327                    if let Some('+') | Some('-') = state.peek() {
328                        state.advance(1);
329                    }
330                    while let Some(ch) = state.peek() {
331                        if ch.is_ascii_digit() || ch == '_' {
332                            state.advance(1);
333                        }
334                        else {
335                            break;
336                        }
337                    }
338                }
339
340                state.add_token(SwiftSyntaxKind::NumberLiteral, start_pos, state.get_position());
341                true
342            }
343            else {
344                false
345            }
346        }
347        else {
348            false
349        }
350    }
351
352    /// 处理字符串字面量
353    fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
354        let start_pos = state.get_position();
355
356        // 处理多行字符"""..."""
357        if let Some('"') = state.peek() {
358            if let Some('"') = state.peek_next_n(1) {
359                if let Some('"') = state.peek_next_n(2) {
360                    // 多行字符
361                    state.advance(3);
362                    while let Some(ch) = state.peek() {
363                        if ch == '"' {
364                            if let Some('"') = state.peek_next_n(1) {
365                                if let Some('"') = state.peek_next_n(2) {
366                                    state.advance(3);
367                                    break;
368                                }
369                            }
370                        }
371                        state.advance(ch.len_utf8());
372                    }
373                    state.add_token(SwiftSyntaxKind::StringLiteral, start_pos, state.get_position());
374                    return true;
375                }
376            }
377
378            // 普通字符串
379            state.advance(1);
380            while let Some(ch) = state.peek() {
381                if ch == '"' {
382                    state.advance(1);
383                    break;
384                }
385                else if ch == '\\' {
386                    state.advance(1);
387                    if let Some(_) = state.peek() {
388                        state.advance(1);
389                    }
390                }
391                else if ch == '\n' || ch == '\r' {
392                    break; // 普通字符串不能跨行
393                }
394                else {
395                    state.advance(ch.len_utf8());
396                }
397            }
398            state.add_token(SwiftSyntaxKind::StringLiteral, start_pos, state.get_position());
399            true
400        }
401        else {
402            false
403        }
404    }
405
406    /// 处理操作
407    fn lex_operator<S: Source>(&self, state: &mut State<S>) -> bool {
408        let start_pos = state.get_position();
409
410        if let Some(ch) = state.peek() {
411            let token_kind = match ch {
412                '+' => {
413                    state.advance(1);
414                    if let Some('=') = state.peek() {
415                        state.advance(1);
416                        SwiftSyntaxKind::PlusAssign
417                    }
418                    else {
419                        SwiftSyntaxKind::Plus
420                    }
421                }
422                '-' => {
423                    state.advance(1);
424                    match state.peek() {
425                        Some('=') => {
426                            state.advance(1);
427                            SwiftSyntaxKind::MinusAssign
428                        }
429                        Some('>') => {
430                            state.advance(1);
431                            SwiftSyntaxKind::Arrow
432                        }
433                        _ => SwiftSyntaxKind::Minus,
434                    }
435                }
436                '*' => {
437                    state.advance(1);
438                    if let Some('=') = state.peek() {
439                        state.advance(1);
440                        SwiftSyntaxKind::StarAssign
441                    }
442                    else {
443                        SwiftSyntaxKind::Star
444                    }
445                }
446                '/' => {
447                    state.advance(1);
448                    if let Some('=') = state.peek() {
449                        state.advance(1);
450                        SwiftSyntaxKind::SlashAssign
451                    }
452                    else {
453                        SwiftSyntaxKind::Slash
454                    }
455                }
456                '%' => {
457                    state.advance(1);
458                    if let Some('=') = state.peek() {
459                        state.advance(1);
460                        SwiftSyntaxKind::PercentAssign
461                    }
462                    else {
463                        SwiftSyntaxKind::Percent
464                    }
465                }
466                '=' => {
467                    state.advance(1);
468                    if let Some('=') = state.peek() {
469                        state.advance(1);
470                        SwiftSyntaxKind::Equal
471                    }
472                    else {
473                        SwiftSyntaxKind::Assign
474                    }
475                }
476                '!' => {
477                    state.advance(1);
478                    if let Some('=') = state.peek() {
479                        state.advance(1);
480                        SwiftSyntaxKind::NotEqual
481                    }
482                    else {
483                        SwiftSyntaxKind::LogicalNot
484                    }
485                }
486                '<' => {
487                    state.advance(1);
488                    match state.peek() {
489                        Some('=') => {
490                            state.advance(1);
491                            SwiftSyntaxKind::LessEqual
492                        }
493                        Some('<') => {
494                            state.advance(1);
495                            if let Some('=') = state.peek() {
496                                state.advance(1);
497                                SwiftSyntaxKind::LeftShiftAssign
498                            }
499                            else {
500                                SwiftSyntaxKind::LeftShift
501                            }
502                        }
503                        _ => SwiftSyntaxKind::Less,
504                    }
505                }
506                '>' => {
507                    state.advance(1);
508                    match state.peek() {
509                        Some('=') => {
510                            state.advance(1);
511                            SwiftSyntaxKind::GreaterEqual
512                        }
513                        Some('>') => {
514                            state.advance(1);
515                            if let Some('=') = state.peek() {
516                                state.advance(1);
517                                SwiftSyntaxKind::RightShiftAssign
518                            }
519                            else {
520                                SwiftSyntaxKind::RightShift
521                            }
522                        }
523                        _ => SwiftSyntaxKind::Greater,
524                    }
525                }
526                '&' => {
527                    state.advance(1);
528                    match state.peek() {
529                        Some('&') => {
530                            state.advance(1);
531                            SwiftSyntaxKind::LogicalAnd
532                        }
533                        Some('=') => {
534                            state.advance(1);
535                            SwiftSyntaxKind::AndAssign
536                        }
537                        _ => SwiftSyntaxKind::BitAnd,
538                    }
539                }
540                '|' => {
541                    state.advance(1);
542                    match state.peek() {
543                        Some('|') => {
544                            state.advance(1);
545                            SwiftSyntaxKind::LogicalOr
546                        }
547                        Some('=') => {
548                            state.advance(1);
549                            SwiftSyntaxKind::OrAssign
550                        }
551                        _ => SwiftSyntaxKind::BitOr,
552                    }
553                }
554                '^' => {
555                    state.advance(1);
556                    if let Some('=') = state.peek() {
557                        state.advance(1);
558                        SwiftSyntaxKind::XorAssign
559                    }
560                    else {
561                        SwiftSyntaxKind::BitXor
562                    }
563                }
564                '~' => {
565                    state.advance(1);
566                    SwiftSyntaxKind::BitNot
567                }
568                '?' => {
569                    state.advance(1);
570                    if let Some('?') = state.peek() {
571                        state.advance(1);
572                        SwiftSyntaxKind::QuestionQuestion
573                    }
574                    else {
575                        SwiftSyntaxKind::Question
576                    }
577                }
578                '.' => {
579                    state.advance(1);
580                    match state.peek() {
581                        Some('.') => {
582                            state.advance(1);
583                            if let Some('<') = state.peek() {
584                                state.advance(1);
585                                SwiftSyntaxKind::Range
586                            }
587                            else {
588                                SwiftSyntaxKind::ClosedRange
589                            }
590                        }
591                        _ => SwiftSyntaxKind::Dot,
592                    }
593                }
594                _ => return false,
595            };
596
597            state.add_token(token_kind, start_pos, state.get_position());
598            true
599        }
600        else {
601            false
602        }
603    }
604
605    /// 处理分隔
606    fn lex_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
607        let start_pos = state.get_position();
608
609        if let Some(ch) = state.peek() {
610            let token_kind = match ch {
611                '(' => SwiftSyntaxKind::LeftParen,
612                ')' => SwiftSyntaxKind::RightParen,
613                '[' => SwiftSyntaxKind::LeftBracket,
614                ']' => SwiftSyntaxKind::RightBracket,
615                '{' => SwiftSyntaxKind::LeftBrace,
616                '}' => SwiftSyntaxKind::RightBrace,
617                ',' => SwiftSyntaxKind::Comma,
618                ';' => SwiftSyntaxKind::Semicolon,
619                ':' => SwiftSyntaxKind::Colon,
620                '@' => SwiftSyntaxKind::At,
621                '#' => SwiftSyntaxKind::Hash,
622                '$' => SwiftSyntaxKind::Dollar,
623                '_' => SwiftSyntaxKind::Underscore,
624                '\\' => SwiftSyntaxKind::Backslash,
625                _ => return false,
626            };
627
628            state.advance(ch.len_utf8());
629            state.add_token(token_kind, start_pos, state.get_position());
630            true
631        }
632        else {
633            false
634        }
635    }
636}
637
638impl<'config> Lexer<SwiftLanguage> for SwiftLexer<'config> {
639    fn lex_incremental(
640        &self,
641        source: impl Source,
642        _changed: usize,
643        _cache: IncrementalCache<SwiftLanguage>,
644    ) -> LexOutput<SwiftLanguage> {
645        let mut state = LexerState::new(source);
646        let result = self.run(&mut state);
647        state.finish(result)
648    }
649}
650
651impl<'config> SwiftLexer<'config> {
652    fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
653        while state.not_at_end() {
654            let _safe_point = state.get_position();
655
656            // 尝试各种词法规则
657            if self.skip_whitespace(state) {
658                continue;
659            }
660
661            if self.lex_newline(state) {
662                continue;
663            }
664
665            if self.lex_comment(state) {
666                continue;
667            }
668
669            if self.lex_string_literal(state) {
670                continue;
671            }
672
673            if self.lex_number_literal(state) {
674                continue;
675            }
676
677            if self.lex_identifier_or_keyword(state) {
678                continue;
679            }
680
681            if self.lex_operator(state) {
682                continue;
683            }
684
685            if self.lex_delimiter(state) {
686                continue;
687            }
688
689            // 如果所有规则都不匹配,跳过当前字符并标记为错误
690            let start_pos = state.get_position();
691            if let Some(ch) = state.peek() {
692                state.advance(ch.len_utf8());
693                state.add_token(SwiftSyntaxKind::Error, start_pos, state.get_position());
694            }
695        }
696
697        // 添加 Eof token
698        let eof_pos = state.get_position();
699        state.add_token(SwiftSyntaxKind::Eof, eof_pos, eof_pos);
700
701        Ok(())
702    }
703}