Skip to main content

oak_swift/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Token type definitions for Swift lexer.
3pub mod token_type;
4
5use crate::language::SwiftLanguage;
6pub use crate::lexer::token_type::SwiftTokenType;
7use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
8
9pub(crate) type State<'a, S> = LexerState<'a, S, SwiftLanguage>;
10
11/// Lexer for Swift source code.
12#[derive(Clone, Debug)]
13pub struct SwiftLexer<'config> {
14    config: &'config SwiftLanguage,
15}
16
17impl<'config> Lexer<SwiftLanguage> for SwiftLexer<'config> {
18    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<SwiftLanguage>) -> LexOutput<SwiftLanguage> {
19        let mut state = State::new(source);
20        let result = self.run(&mut state);
21        if result.is_ok() {
22            state.add_eof();
23        }
24        state.finish_with_cache(result, cache)
25    }
26}
27
28impl<'config> SwiftLexer<'config> {
29    /// Creates a new SwiftLexer with the given language configuration.
30    pub fn new(config: &'config SwiftLanguage) -> Self {
31        Self { config }
32    }
33
34    /// Skip whitespace
35    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
36        let start_pos = state.get_position();
37
38        while let Some(ch) = state.peek() {
39            if ch == ' ' || ch == '\t' {
40                state.advance(ch.len_utf8());
41            }
42            else {
43                break;
44            }
45        }
46
47        if state.get_position() > start_pos {
48            state.add_token(SwiftTokenType::Whitespace, start_pos, state.get_position());
49            true
50        }
51        else {
52            false
53        }
54    }
55
56    /// Handles newlines
57    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
58        let start_pos = state.get_position();
59
60        if let Some('\n') = state.peek() {
61            state.advance(1);
62            state.add_token(SwiftTokenType::Newline, start_pos, state.get_position());
63            true
64        }
65        else if let Some('\r') = state.peek() {
66            state.advance(1);
67            if let Some('\n') = state.peek() {
68                state.advance(1);
69            }
70            state.add_token(SwiftTokenType::Newline, start_pos, state.get_position());
71            true
72        }
73        else {
74            false
75        }
76    }
77
78    /// Handles comments
79    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
80        let start_pos = state.get_position();
81
82        if let Some('/') = state.peek() {
83            if let Some('/') = state.peek_next_n(1) {
84                // Single-line comment
85                state.advance(2);
86                while let Some(ch) = state.peek() {
87                    if ch == '\n' || ch == '\r' {
88                        break;
89                    }
90                    state.advance(ch.len_utf8());
91                }
92                state.add_token(SwiftTokenType::Comment, start_pos, state.get_position());
93                true
94            }
95            else if let Some('*') = state.peek_next_n(1) {
96                // Multi-line comment
97                state.advance(2);
98                let mut depth = 1;
99                while let Some(ch) = state.peek() {
100                    if ch == '/'
101                        && let Some('*') = state.peek_next_n(1)
102                    {
103                        state.advance(2);
104                        depth += 1;
105                    }
106                    else if ch == '*'
107                        && let Some('/') = state.peek_next_n(1)
108                    {
109                        state.advance(2);
110                        depth -= 1;
111                        if depth == 0 {
112                            break;
113                        }
114                    }
115                    else {
116                        state.advance(ch.len_utf8());
117                    }
118                }
119                state.add_token(SwiftTokenType::Comment, start_pos, state.get_position());
120                true
121            }
122            else {
123                false
124            }
125        }
126        else {
127            false
128        }
129    }
130
131    /// Handles identifiers or keywords
132    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
133        let start_pos = state.get_position();
134
135        // Handle backtick identifier `identifier`
136        let is_escaped = if let Some('`') = state.peek() {
137            state.advance(1);
138            true
139        }
140        else {
141            false
142        };
143
144        if let Some(ch) = state.peek() {
145            if ch.is_ascii_alphabetic() || ch == '_' {
146                state.advance(ch.len_utf8());
147
148                while let Some(ch) = state.peek() {
149                    if ch.is_ascii_alphanumeric() || ch == '_' {
150                        state.advance(ch.len_utf8());
151                    }
152                    else {
153                        break;
154                    }
155                }
156
157                // If it's an escaped identifier, need to match the closing backtick
158                if is_escaped {
159                    if let Some('`') = state.peek() {
160                        state.advance(1);
161                    }
162                    state.add_token(SwiftTokenType::Identifier, start_pos, state.get_position());
163                    return true;
164                }
165
166                // Check if it's a keyword
167                let text = state.get_text_in(core::range::Range { start: start_pos, end: state.get_position() });
168
169                let token_kind = match text.as_ref() {
170                    "class" => SwiftTokenType::Class,
171                    "struct" => SwiftTokenType::Struct,
172                    "enum" => SwiftTokenType::Enum,
173                    "protocol" => SwiftTokenType::Protocol,
174                    "extension" => SwiftTokenType::Extension,
175                    "func" => SwiftTokenType::Func,
176                    "var" => SwiftTokenType::Var,
177                    "let" => SwiftTokenType::Let,
178                    "init" => SwiftTokenType::Init,
179                    "deinit" => SwiftTokenType::Deinit,
180                    "subscript" => SwiftTokenType::Subscript,
181                    "typealias" => SwiftTokenType::Typealias,
182                    "import" => SwiftTokenType::Import,
183                    "if" => SwiftTokenType::If,
184                    "else" => SwiftTokenType::Else,
185                    "switch" => SwiftTokenType::Switch,
186                    "case" => SwiftTokenType::Case,
187                    "default" => SwiftTokenType::Default,
188                    "for" => SwiftTokenType::For,
189                    "while" => SwiftTokenType::While,
190                    "repeat" => SwiftTokenType::Repeat,
191                    "do" => SwiftTokenType::Do,
192                    "break" => SwiftTokenType::Break,
193                    "continue" => SwiftTokenType::Continue,
194                    "fallthrough" => SwiftTokenType::Fallthrough,
195                    "return" => SwiftTokenType::Return,
196                    "throw" => SwiftTokenType::Throw,
197                    "try" => SwiftTokenType::Try,
198                    "catch" => SwiftTokenType::Catch,
199                    "finally" => SwiftTokenType::Finally,
200                    "guard" => SwiftTokenType::Guard,
201                    "defer" => SwiftTokenType::Defer,
202                    "public" => SwiftTokenType::Public,
203                    "private" => SwiftTokenType::Private,
204                    "internal" => SwiftTokenType::Internal,
205                    "fileprivate" => SwiftTokenType::Fileprivate,
206                    "open" => SwiftTokenType::Open,
207                    "static" => SwiftTokenType::Static,
208                    "final" => SwiftTokenType::Final,
209                    "override" => SwiftTokenType::Override,
210                    "mutating" => SwiftTokenType::Mutating,
211                    "nonmutating" => SwiftTokenType::Nonmutating,
212                    "lazy" => SwiftTokenType::Lazy,
213                    "weak" => SwiftTokenType::Weak,
214                    "unowned" => SwiftTokenType::Unowned,
215                    "optional" => SwiftTokenType::Optional,
216                    "required" => SwiftTokenType::Required,
217                    "convenience" => SwiftTokenType::Convenience,
218                    "dynamic" => SwiftTokenType::Dynamic,
219                    "infix" => SwiftTokenType::Infix,
220                    "prefix" => SwiftTokenType::Prefix,
221                    "postfix" => SwiftTokenType::Postfix,
222                    "Any" => SwiftTokenType::Any,
223                    "AnyObject" => SwiftTokenType::AnyObject,
224                    "Self" => SwiftTokenType::Self_,
225                    "Type" => SwiftTokenType::Type,
226                    "Protocol" => SwiftTokenType::Protocol_,
227                    "true" => SwiftTokenType::True,
228                    "false" => SwiftTokenType::False,
229                    "nil" => SwiftTokenType::Nil,
230                    "as" => SwiftTokenType::As,
231                    "is" => SwiftTokenType::Is,
232                    "in" => SwiftTokenType::In,
233                    "where" => SwiftTokenType::Where,
234                    "associatedtype" => SwiftTokenType::Associatedtype,
235                    "operator" => SwiftTokenType::Operator,
236                    "precedencegroup" => SwiftTokenType::Precedencegroup,
237                    "indirect" => SwiftTokenType::Indirect,
238                    "rethrows" => SwiftTokenType::Rethrows,
239                    "throws" => SwiftTokenType::Throws,
240                    "inout" => SwiftTokenType::Inout,
241                    _ => SwiftTokenType::Identifier,
242                };
243                state.add_token(token_kind, start_pos, state.get_position());
244                true
245            }
246            else {
247                if is_escaped {
248                    // Backtrack backtick
249                    state.set_position(start_pos);
250                }
251                false
252            }
253        }
254        else {
255            if is_escaped {
256                // Backtrack backtick
257                state.set_position(start_pos);
258            }
259            false
260        }
261    }
262
263    /// Handles number literals
264    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
265        let start_pos = state.get_position();
266
267        if let Some(ch) = state.peek() {
268            if ch.is_ascii_digit() {
269                state.advance(1);
270
271                // Handle binary, octal, hexadecimal
272                if ch == '0' {
273                    if let Some('b') | Some('B') = state.peek() {
274                        state.advance(1);
275                        while let Some(ch) = state.peek() {
276                            if ch == '0' || ch == '1' || ch == '_' {
277                                state.advance(1);
278                            }
279                            else {
280                                break;
281                            }
282                        }
283                    }
284                    else if let Some('o') | Some('O') = state.peek() {
285                        state.advance(1);
286                        while let Some(ch) = state.peek() {
287                            if ch.is_ascii_digit() && ch < '8' || ch == '_' {
288                                state.advance(1);
289                            }
290                            else {
291                                break;
292                            }
293                        }
294                    }
295                    else if let Some('x') | Some('X') = state.peek() {
296                        state.advance(1);
297                        while let Some(ch) = state.peek() {
298                            if ch.is_ascii_hexdigit() || ch == '_' {
299                                state.advance(1);
300                            }
301                            else {
302                                break;
303                            }
304                        }
305                    }
306                    else {
307                        // Regular decimal numbers
308                        while let Some(ch) = state.peek() {
309                            if ch.is_ascii_digit() || ch == '_' {
310                                state.advance(1);
311                            }
312                            else {
313                                break;
314                            }
315                        }
316                    }
317                }
318                else {
319                    // Decimal number
320                    while let Some(ch) = state.peek() {
321                        if ch.is_ascii_digit() || ch == '_' {
322                            state.advance(1);
323                        }
324                        else {
325                            break;
326                        }
327                    }
328                }
329
330                // Handle decimals
331                if let Some('.') = state.peek() {
332                    // If followed immediately by another dot, it's part of a range operator, not a decimal point
333                    if let Some(next) = state.peek_next_n(1) {
334                        if next != '.' {
335                            state.advance(1);
336                            while let Some(ch) = state.peek() {
337                                if ch.is_ascii_digit() || ch == '_' {
338                                    state.advance(1);
339                                }
340                                else {
341                                    break;
342                                }
343                            }
344                        }
345                    }
346                    else {
347                        // No characters left, 1. can also be a floating point
348                        state.advance(1);
349                    }
350                }
351
352                // Handle exponents
353                if let Some('e') | Some('E') = state.peek() {
354                    state.advance(1);
355                    if let Some('+') | Some('-') = state.peek() {
356                        state.advance(1);
357                    }
358                    while let Some(ch) = state.peek() {
359                        if ch.is_ascii_digit() || ch == '_' {
360                            state.advance(1);
361                        }
362                        else {
363                            break;
364                        }
365                    }
366                }
367
368                state.add_token(SwiftTokenType::NumberLiteral, start_pos, state.get_position());
369                true
370            }
371            else {
372                false
373            }
374        }
375        else {
376            false
377        }
378    }
379
380    /// Handles string literals
381    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
382        let start_pos = state.get_position();
383
384        // Handles multi-line strings """..."""
385        if let Some('"') = state.peek() {
386            if let Some('"') = state.peek_next_n(1) {
387                if let Some('"') = state.peek_next_n(2) {
388                    // Multi-line string
389                    state.advance(3);
390                    while let Some(ch) = state.peek() {
391                        if ch == '"' {
392                            if let Some('"') = state.peek_next_n(1) {
393                                if let Some('"') = state.peek_next_n(2) {
394                                    state.advance(3);
395                                    break;
396                                }
397                            }
398                        }
399                        state.advance(ch.len_utf8());
400                    }
401                    state.add_token(SwiftTokenType::StringLiteral, start_pos, state.get_position());
402                    return true;
403                }
404            }
405
406            // Normal string
407            state.advance(1);
408            while let Some(ch) = state.peek() {
409                if ch == '"' {
410                    state.advance(1);
411                    break;
412                }
413                else if ch == '\\' {
414                    state.advance(1);
415                    if let Some(_) = state.peek() {
416                        state.advance(1);
417                    }
418                }
419                else if ch == '\n' || ch == '\r' {
420                    break; // Normal strings cannot span multiple lines
421                }
422                else {
423                    state.advance(ch.len_utf8());
424                }
425            }
426            state.add_token(SwiftTokenType::StringLiteral, start_pos, state.get_position());
427            true
428        }
429        else {
430            false
431        }
432    }
433
434    /// Handles operators
435    fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
436        let start_pos = state.get_position();
437
438        if let Some(ch) = state.peek() {
439            let token_kind = match ch {
440                '+' => {
441                    state.advance(1);
442                    if let Some('=') = state.peek() {
443                        state.advance(1);
444                        SwiftTokenType::PlusAssign
445                    }
446                    else {
447                        SwiftTokenType::Plus
448                    }
449                }
450                '-' => {
451                    state.advance(1);
452                    match state.peek() {
453                        Some('=') => {
454                            state.advance(1);
455                            SwiftTokenType::MinusAssign
456                        }
457                        Some('>') => {
458                            state.advance(1);
459                            SwiftTokenType::Arrow
460                        }
461                        _ => SwiftTokenType::Minus,
462                    }
463                }
464                '*' => {
465                    state.advance(1);
466                    if let Some('=') = state.peek() {
467                        state.advance(1);
468                        SwiftTokenType::StarAssign
469                    }
470                    else {
471                        SwiftTokenType::Star
472                    }
473                }
474                '/' => {
475                    state.advance(1);
476                    if let Some('=') = state.peek() {
477                        state.advance(1);
478                        SwiftTokenType::SlashAssign
479                    }
480                    else {
481                        SwiftTokenType::Slash
482                    }
483                }
484                '%' => {
485                    state.advance(1);
486                    if let Some('=') = state.peek() {
487                        state.advance(1);
488                        SwiftTokenType::PercentAssign
489                    }
490                    else {
491                        SwiftTokenType::Percent
492                    }
493                }
494                '=' => {
495                    state.advance(1);
496                    if let Some('=') = state.peek() {
497                        state.advance(1);
498                        SwiftTokenType::Equal
499                    }
500                    else {
501                        SwiftTokenType::Assign
502                    }
503                }
504                '!' => {
505                    state.advance(1);
506                    if let Some('=') = state.peek() {
507                        state.advance(1);
508                        SwiftTokenType::NotEqual
509                    }
510                    else {
511                        SwiftTokenType::LogicalNot
512                    }
513                }
514                '<' => {
515                    state.advance(1);
516                    match state.peek() {
517                        Some('=') => {
518                            state.advance(1);
519                            SwiftTokenType::LessEqual
520                        }
521                        Some('<') => {
522                            state.advance(1);
523                            if let Some('=') = state.peek() {
524                                state.advance(1);
525                                SwiftTokenType::LeftShiftAssign
526                            }
527                            else {
528                                SwiftTokenType::LeftShift
529                            }
530                        }
531                        _ => SwiftTokenType::Less,
532                    }
533                }
534                '>' => {
535                    state.advance(1);
536                    match state.peek() {
537                        Some('=') => {
538                            state.advance(1);
539                            SwiftTokenType::GreaterEqual
540                        }
541                        Some('>') => {
542                            state.advance(1);
543                            if let Some('=') = state.peek() {
544                                state.advance(1);
545                                SwiftTokenType::RightShiftAssign
546                            }
547                            else {
548                                SwiftTokenType::RightShift
549                            }
550                        }
551                        _ => SwiftTokenType::Greater,
552                    }
553                }
554                '&' => {
555                    state.advance(1);
556                    match state.peek() {
557                        Some('&') => {
558                            state.advance(1);
559                            SwiftTokenType::LogicalAnd
560                        }
561                        Some('=') => {
562                            state.advance(1);
563                            SwiftTokenType::AndAssign
564                        }
565                        _ => SwiftTokenType::BitAnd,
566                    }
567                }
568                '|' => {
569                    state.advance(1);
570                    match state.peek() {
571                        Some('|') => {
572                            state.advance(1);
573                            SwiftTokenType::LogicalOr
574                        }
575                        Some('=') => {
576                            state.advance(1);
577                            SwiftTokenType::OrAssign
578                        }
579                        _ => SwiftTokenType::BitOr,
580                    }
581                }
582                '^' => {
583                    state.advance(1);
584                    if let Some('=') = state.peek() {
585                        state.advance(1);
586                        SwiftTokenType::XorAssign
587                    }
588                    else {
589                        SwiftTokenType::BitXor
590                    }
591                }
592                '~' => {
593                    state.advance(1);
594                    SwiftTokenType::BitNot
595                }
596                '?' => {
597                    state.advance(1);
598                    if let Some('?') = state.peek() {
599                        state.advance(1);
600                        SwiftTokenType::QuestionQuestion
601                    }
602                    else {
603                        SwiftTokenType::Question
604                    }
605                }
606                '.' => {
607                    state.advance(1);
608                    match state.peek() {
609                        Some('.') => {
610                            state.advance(1);
611                            match state.peek() {
612                                Some('.') => {
613                                    state.advance(1);
614                                    SwiftTokenType::ClosedRange
615                                }
616                                Some('<') => {
617                                    state.advance(1);
618                                    SwiftTokenType::Range
619                                }
620                                _ => SwiftTokenType::Dot, // Or error
621                            }
622                        }
623                        _ => SwiftTokenType::Dot,
624                    }
625                }
626                _ => return false,
627            };
628
629            state.add_token(token_kind, start_pos, state.get_position());
630            true
631        }
632        else {
633            false
634        }
635    }
636
637    /// Handles delimiters
638    fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
639        let start_pos = state.get_position();
640
641        if let Some(ch) = state.peek() {
642            let token_kind = match ch {
643                '(' => SwiftTokenType::LeftParen,
644                ')' => SwiftTokenType::RightParen,
645                '[' => SwiftTokenType::LeftBracket,
646                ']' => SwiftTokenType::RightBracket,
647                '{' => SwiftTokenType::LeftBrace,
648                '}' => SwiftTokenType::RightBrace,
649                ',' => SwiftTokenType::Comma,
650                ';' => SwiftTokenType::Semicolon,
651                ':' => SwiftTokenType::Colon,
652                '@' => SwiftTokenType::At,
653                '#' => SwiftTokenType::Hash,
654                '$' => SwiftTokenType::Dollar,
655                '_' => SwiftTokenType::Underscore,
656                '\\' => SwiftTokenType::Backslash,
657                _ => return false,
658            };
659
660            state.advance(ch.len_utf8());
661            state.add_token(token_kind, start_pos, state.get_position());
662            true
663        }
664        else {
665            false
666        }
667    }
668}
669
670impl<'config> SwiftLexer<'config> {
671    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
672        while state.not_at_end() {
673            let safe_point = state.get_position();
674
675            // Try various lexical rules
676            if self.skip_whitespace(state) {
677                continue;
678            }
679
680            if self.lex_newline(state) {
681                continue;
682            }
683
684            if self.lex_comment(state) {
685                continue;
686            }
687
688            if self.lex_string_literal(state) {
689                continue;
690            }
691
692            if self.lex_number_literal(state) {
693                continue;
694            }
695
696            if self.lex_identifier_or_keyword(state) {
697                continue;
698            }
699
700            if self.lex_operator(state) {
701                continue;
702            }
703
704            if self.lex_delimiter(state) {
705                continue;
706            }
707
708            // If no rules match, skip current character and mark as error
709            let start_pos = state.get_position();
710            if let Some(ch) = state.peek() {
711                state.advance(ch.len_utf8());
712                state.add_token(SwiftTokenType::Error, start_pos, state.get_position());
713            }
714
715            state.advance_if_dead_lock(safe_point)
716        }
717
718        Ok(())
719    }
720}