1use crate::{kind::SwiftSyntaxKind, language::SwiftLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, SwiftLanguage>;
5
6pub struct SwiftLexer<'config> {
7 config: &'config SwiftLanguage,
8}
9
10impl<'config> SwiftLexer<'config> {
11 pub fn new(config: &'config SwiftLanguage) -> Self {
12 Self { config }
13 }
14
15 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
17 let start_pos = state.get_position();
18
19 while let Some(ch) = state.peek() {
20 if ch == ' ' || ch == '\t' {
21 state.advance(ch.len_utf8());
22 }
23 else {
24 break;
25 }
26 }
27
28 if state.get_position() > start_pos {
29 state.add_token(SwiftSyntaxKind::Whitespace, start_pos, state.get_position());
30 true
31 }
32 else {
33 false
34 }
35 }
36
37 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
39 let start_pos = state.get_position();
40
41 if let Some('\n') = state.peek() {
42 state.advance(1);
43 state.add_token(SwiftSyntaxKind::Newline, start_pos, state.get_position());
44 true
45 }
46 else if let Some('\r') = state.peek() {
47 state.advance(1);
48 if let Some('\n') = state.peek() {
49 state.advance(1);
50 }
51 state.add_token(SwiftSyntaxKind::Newline, start_pos, state.get_position());
52 true
53 }
54 else {
55 false
56 }
57 }
58
59 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
61 let start_pos = state.get_position();
62
63 if let Some('/') = state.peek() {
64 if let Some('/') = state.peek_next_n(1) {
65 state.advance(2);
67 while let Some(ch) = state.peek() {
68 if ch == '\n' || ch == '\r' {
69 break;
70 }
71 state.advance(ch.len_utf8());
72 }
73 state.add_token(SwiftSyntaxKind::Comment, start_pos, state.get_position());
74 true
75 }
76 else if let Some('*') = state.peek_next_n(1) {
77 state.advance(2);
79 let mut depth = 1;
80 while let Some(ch) = state.peek() {
81 if ch == '/'
82 && let Some('*') = state.peek_next_n(1)
83 {
84 state.advance(2);
85 depth += 1;
86 }
87 else if ch == '*'
88 && let Some('/') = state.peek_next_n(1)
89 {
90 state.advance(2);
91 depth -= 1;
92 if depth == 0 {
93 break;
94 }
95 }
96 else {
97 state.advance(ch.len_utf8());
98 }
99 }
100 state.add_token(SwiftSyntaxKind::Comment, start_pos, state.get_position());
101 true
102 }
103 else {
104 false
105 }
106 }
107 else {
108 false
109 }
110 }
111
112 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
114 let start_pos = state.get_position();
115
116 let is_escaped = if let Some('`') = state.peek() {
118 state.advance(1);
119 true
120 }
121 else {
122 false
123 };
124
125 if let Some(ch) = state.peek() {
126 if ch.is_ascii_alphabetic() || ch == '_' {
127 state.advance(ch.len_utf8());
128
129 while let Some(ch) = state.peek() {
130 if ch.is_ascii_alphanumeric() || ch == '_' {
131 state.advance(ch.len_utf8());
132 }
133 else {
134 break;
135 }
136 }
137
138 if is_escaped {
140 if let Some('`') = state.peek() {
141 state.advance(1);
142 }
143 state.add_token(SwiftSyntaxKind::Identifier, start_pos, state.get_position());
144 return true;
145 }
146
147 let text = state.get_text_in(std::range::Range { start: start_pos, end: state.get_position() });
149
150 let token_kind = match text {
151 "class" => SwiftSyntaxKind::Class,
152 "struct" => SwiftSyntaxKind::Struct,
153 "enum" => SwiftSyntaxKind::Enum,
154 "protocol" => SwiftSyntaxKind::Protocol,
155 "extension" => SwiftSyntaxKind::Extension,
156 "func" => SwiftSyntaxKind::Func,
157 "var" => SwiftSyntaxKind::Var,
158 "let" => SwiftSyntaxKind::Let,
159 "init" => SwiftSyntaxKind::Init,
160 "deinit" => SwiftSyntaxKind::Deinit,
161 "subscript" => SwiftSyntaxKind::Subscript,
162 "typealias" => SwiftSyntaxKind::Typealias,
163 "import" => SwiftSyntaxKind::Import,
164 "if" => SwiftSyntaxKind::If,
165 "else" => SwiftSyntaxKind::Else,
166 "switch" => SwiftSyntaxKind::Switch,
167 "case" => SwiftSyntaxKind::Case,
168 "default" => SwiftSyntaxKind::Default,
169 "for" => SwiftSyntaxKind::For,
170 "while" => SwiftSyntaxKind::While,
171 "repeat" => SwiftSyntaxKind::Repeat,
172 "do" => SwiftSyntaxKind::Do,
173 "break" => SwiftSyntaxKind::Break,
174 "continue" => SwiftSyntaxKind::Continue,
175 "fallthrough" => SwiftSyntaxKind::Fallthrough,
176 "return" => SwiftSyntaxKind::Return,
177 "throw" => SwiftSyntaxKind::Throw,
178 "try" => SwiftSyntaxKind::Try,
179 "catch" => SwiftSyntaxKind::Catch,
180 "finally" => SwiftSyntaxKind::Finally,
181 "guard" => SwiftSyntaxKind::Guard,
182 "defer" => SwiftSyntaxKind::Defer,
183 "public" => SwiftSyntaxKind::Public,
184 "private" => SwiftSyntaxKind::Private,
185 "internal" => SwiftSyntaxKind::Internal,
186 "fileprivate" => SwiftSyntaxKind::Fileprivate,
187 "open" => SwiftSyntaxKind::Open,
188 "static" => SwiftSyntaxKind::Static,
189 "final" => SwiftSyntaxKind::Final,
190 "override" => SwiftSyntaxKind::Override,
191 "mutating" => SwiftSyntaxKind::Mutating,
192 "nonmutating" => SwiftSyntaxKind::Nonmutating,
193 "lazy" => SwiftSyntaxKind::Lazy,
194 "weak" => SwiftSyntaxKind::Weak,
195 "unowned" => SwiftSyntaxKind::Unowned,
196 "optional" => SwiftSyntaxKind::Optional,
197 "required" => SwiftSyntaxKind::Required,
198 "convenience" => SwiftSyntaxKind::Convenience,
199 "dynamic" => SwiftSyntaxKind::Dynamic,
200 "infix" => SwiftSyntaxKind::Infix,
201 "prefix" => SwiftSyntaxKind::Prefix,
202 "postfix" => SwiftSyntaxKind::Postfix,
203 "Any" => SwiftSyntaxKind::Any,
204 "AnyObject" => SwiftSyntaxKind::AnyObject,
205 "Self" => SwiftSyntaxKind::Self_,
206 "Type" => SwiftSyntaxKind::Type,
207 "Protocol" => SwiftSyntaxKind::Protocol_,
208 "true" => SwiftSyntaxKind::True,
209 "false" => SwiftSyntaxKind::False,
210 "nil" => SwiftSyntaxKind::Nil,
211 "as" => SwiftSyntaxKind::As,
212 "is" => SwiftSyntaxKind::Is,
213 "in" => SwiftSyntaxKind::In,
214 "where" => SwiftSyntaxKind::Where,
215 "associatedtype" => SwiftSyntaxKind::Associatedtype,
216 "operator" => SwiftSyntaxKind::Operator,
217 "precedencegroup" => SwiftSyntaxKind::Precedencegroup,
218 "indirect" => SwiftSyntaxKind::Indirect,
219 "rethrows" => SwiftSyntaxKind::Rethrows,
220 "throws" => SwiftSyntaxKind::Throws,
221 "inout" => SwiftSyntaxKind::Inout,
222 _ => SwiftSyntaxKind::Identifier,
223 };
224 state.add_token(token_kind, start_pos, state.get_position());
225 true
226 }
227 else {
228 if is_escaped {
229 state.set_position(start_pos);
231 }
232 false
233 }
234 }
235 else {
236 if is_escaped {
237 state.set_position(start_pos);
239 }
240 false
241 }
242 }
243
244 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
246 let start_pos = state.get_position();
247
248 if let Some(ch) = state.peek() {
249 if ch.is_ascii_digit() {
250 state.advance(1);
251
252 if ch == '0' {
254 if let Some('b') | Some('B') = state.peek() {
255 state.advance(1);
256 while let Some(ch) = state.peek() {
257 if ch == '0' || ch == '1' || ch == '_' {
258 state.advance(1);
259 }
260 else {
261 break;
262 }
263 }
264 }
265 else if let Some('o') | Some('O') = state.peek() {
266 state.advance(1);
267 while let Some(ch) = state.peek() {
268 if ch.is_ascii_digit() && ch < '8' || ch == '_' {
269 state.advance(1);
270 }
271 else {
272 break;
273 }
274 }
275 }
276 else if let Some('x') | Some('X') = state.peek() {
277 state.advance(1);
278 while let Some(ch) = state.peek() {
279 if ch.is_ascii_hexdigit() || ch == '_' {
280 state.advance(1);
281 }
282 else {
283 break;
284 }
285 }
286 }
287 else {
288 while let Some(ch) = state.peek() {
290 if ch.is_ascii_digit() || ch == '_' {
291 state.advance(1);
292 }
293 else {
294 break;
295 }
296 }
297 }
298 }
299 else {
300 while let Some(ch) = state.peek() {
302 if ch.is_ascii_digit() || ch == '_' {
303 state.advance(1);
304 }
305 else {
306 break;
307 }
308 }
309 }
310
311 if let Some('.') = state.peek() {
313 state.advance(1);
314 while let Some(ch) = state.peek() {
315 if ch.is_ascii_digit() || ch == '_' {
316 state.advance(1);
317 }
318 else {
319 break;
320 }
321 }
322 }
323
324 if let Some('e') | Some('E') = state.peek() {
326 state.advance(1);
327 if let Some('+') | Some('-') = state.peek() {
328 state.advance(1);
329 }
330 while let Some(ch) = state.peek() {
331 if ch.is_ascii_digit() || ch == '_' {
332 state.advance(1);
333 }
334 else {
335 break;
336 }
337 }
338 }
339
340 state.add_token(SwiftSyntaxKind::NumberLiteral, start_pos, state.get_position());
341 true
342 }
343 else {
344 false
345 }
346 }
347 else {
348 false
349 }
350 }
351
352 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
354 let start_pos = state.get_position();
355
356 if let Some('"') = state.peek() {
358 if let Some('"') = state.peek_next_n(1) {
359 if let Some('"') = state.peek_next_n(2) {
360 state.advance(3);
362 while let Some(ch) = state.peek() {
363 if ch == '"' {
364 if let Some('"') = state.peek_next_n(1) {
365 if let Some('"') = state.peek_next_n(2) {
366 state.advance(3);
367 break;
368 }
369 }
370 }
371 state.advance(ch.len_utf8());
372 }
373 state.add_token(SwiftSyntaxKind::StringLiteral, start_pos, state.get_position());
374 return true;
375 }
376 }
377
378 state.advance(1);
380 while let Some(ch) = state.peek() {
381 if ch == '"' {
382 state.advance(1);
383 break;
384 }
385 else if ch == '\\' {
386 state.advance(1);
387 if let Some(_) = state.peek() {
388 state.advance(1);
389 }
390 }
391 else if ch == '\n' || ch == '\r' {
392 break; }
394 else {
395 state.advance(ch.len_utf8());
396 }
397 }
398 state.add_token(SwiftSyntaxKind::StringLiteral, start_pos, state.get_position());
399 true
400 }
401 else {
402 false
403 }
404 }
405
406 fn lex_operator<S: Source>(&self, state: &mut State<S>) -> bool {
408 let start_pos = state.get_position();
409
410 if let Some(ch) = state.peek() {
411 let token_kind = match ch {
412 '+' => {
413 state.advance(1);
414 if let Some('=') = state.peek() {
415 state.advance(1);
416 SwiftSyntaxKind::PlusAssign
417 }
418 else {
419 SwiftSyntaxKind::Plus
420 }
421 }
422 '-' => {
423 state.advance(1);
424 match state.peek() {
425 Some('=') => {
426 state.advance(1);
427 SwiftSyntaxKind::MinusAssign
428 }
429 Some('>') => {
430 state.advance(1);
431 SwiftSyntaxKind::Arrow
432 }
433 _ => SwiftSyntaxKind::Minus,
434 }
435 }
436 '*' => {
437 state.advance(1);
438 if let Some('=') = state.peek() {
439 state.advance(1);
440 SwiftSyntaxKind::StarAssign
441 }
442 else {
443 SwiftSyntaxKind::Star
444 }
445 }
446 '/' => {
447 state.advance(1);
448 if let Some('=') = state.peek() {
449 state.advance(1);
450 SwiftSyntaxKind::SlashAssign
451 }
452 else {
453 SwiftSyntaxKind::Slash
454 }
455 }
456 '%' => {
457 state.advance(1);
458 if let Some('=') = state.peek() {
459 state.advance(1);
460 SwiftSyntaxKind::PercentAssign
461 }
462 else {
463 SwiftSyntaxKind::Percent
464 }
465 }
466 '=' => {
467 state.advance(1);
468 if let Some('=') = state.peek() {
469 state.advance(1);
470 SwiftSyntaxKind::Equal
471 }
472 else {
473 SwiftSyntaxKind::Assign
474 }
475 }
476 '!' => {
477 state.advance(1);
478 if let Some('=') = state.peek() {
479 state.advance(1);
480 SwiftSyntaxKind::NotEqual
481 }
482 else {
483 SwiftSyntaxKind::LogicalNot
484 }
485 }
486 '<' => {
487 state.advance(1);
488 match state.peek() {
489 Some('=') => {
490 state.advance(1);
491 SwiftSyntaxKind::LessEqual
492 }
493 Some('<') => {
494 state.advance(1);
495 if let Some('=') = state.peek() {
496 state.advance(1);
497 SwiftSyntaxKind::LeftShiftAssign
498 }
499 else {
500 SwiftSyntaxKind::LeftShift
501 }
502 }
503 _ => SwiftSyntaxKind::Less,
504 }
505 }
506 '>' => {
507 state.advance(1);
508 match state.peek() {
509 Some('=') => {
510 state.advance(1);
511 SwiftSyntaxKind::GreaterEqual
512 }
513 Some('>') => {
514 state.advance(1);
515 if let Some('=') = state.peek() {
516 state.advance(1);
517 SwiftSyntaxKind::RightShiftAssign
518 }
519 else {
520 SwiftSyntaxKind::RightShift
521 }
522 }
523 _ => SwiftSyntaxKind::Greater,
524 }
525 }
526 '&' => {
527 state.advance(1);
528 match state.peek() {
529 Some('&') => {
530 state.advance(1);
531 SwiftSyntaxKind::LogicalAnd
532 }
533 Some('=') => {
534 state.advance(1);
535 SwiftSyntaxKind::AndAssign
536 }
537 _ => SwiftSyntaxKind::BitAnd,
538 }
539 }
540 '|' => {
541 state.advance(1);
542 match state.peek() {
543 Some('|') => {
544 state.advance(1);
545 SwiftSyntaxKind::LogicalOr
546 }
547 Some('=') => {
548 state.advance(1);
549 SwiftSyntaxKind::OrAssign
550 }
551 _ => SwiftSyntaxKind::BitOr,
552 }
553 }
554 '^' => {
555 state.advance(1);
556 if let Some('=') = state.peek() {
557 state.advance(1);
558 SwiftSyntaxKind::XorAssign
559 }
560 else {
561 SwiftSyntaxKind::BitXor
562 }
563 }
564 '~' => {
565 state.advance(1);
566 SwiftSyntaxKind::BitNot
567 }
568 '?' => {
569 state.advance(1);
570 if let Some('?') = state.peek() {
571 state.advance(1);
572 SwiftSyntaxKind::QuestionQuestion
573 }
574 else {
575 SwiftSyntaxKind::Question
576 }
577 }
578 '.' => {
579 state.advance(1);
580 match state.peek() {
581 Some('.') => {
582 state.advance(1);
583 if let Some('<') = state.peek() {
584 state.advance(1);
585 SwiftSyntaxKind::Range
586 }
587 else {
588 SwiftSyntaxKind::ClosedRange
589 }
590 }
591 _ => SwiftSyntaxKind::Dot,
592 }
593 }
594 _ => return false,
595 };
596
597 state.add_token(token_kind, start_pos, state.get_position());
598 true
599 }
600 else {
601 false
602 }
603 }
604
605 fn lex_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
607 let start_pos = state.get_position();
608
609 if let Some(ch) = state.peek() {
610 let token_kind = match ch {
611 '(' => SwiftSyntaxKind::LeftParen,
612 ')' => SwiftSyntaxKind::RightParen,
613 '[' => SwiftSyntaxKind::LeftBracket,
614 ']' => SwiftSyntaxKind::RightBracket,
615 '{' => SwiftSyntaxKind::LeftBrace,
616 '}' => SwiftSyntaxKind::RightBrace,
617 ',' => SwiftSyntaxKind::Comma,
618 ';' => SwiftSyntaxKind::Semicolon,
619 ':' => SwiftSyntaxKind::Colon,
620 '@' => SwiftSyntaxKind::At,
621 '#' => SwiftSyntaxKind::Hash,
622 '$' => SwiftSyntaxKind::Dollar,
623 '_' => SwiftSyntaxKind::Underscore,
624 '\\' => SwiftSyntaxKind::Backslash,
625 _ => return false,
626 };
627
628 state.advance(ch.len_utf8());
629 state.add_token(token_kind, start_pos, state.get_position());
630 true
631 }
632 else {
633 false
634 }
635 }
636}
637
638impl<'config> Lexer<SwiftLanguage> for SwiftLexer<'config> {
639 fn lex_incremental(
640 &self,
641 source: impl Source,
642 _changed: usize,
643 _cache: IncrementalCache<SwiftLanguage>,
644 ) -> LexOutput<SwiftLanguage> {
645 let mut state = LexerState::new(source);
646 let result = self.run(&mut state);
647 state.finish(result)
648 }
649}
650
651impl<'config> SwiftLexer<'config> {
652 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
653 while state.not_at_end() {
654 let _safe_point = state.get_position();
655
656 if self.skip_whitespace(state) {
658 continue;
659 }
660
661 if self.lex_newline(state) {
662 continue;
663 }
664
665 if self.lex_comment(state) {
666 continue;
667 }
668
669 if self.lex_string_literal(state) {
670 continue;
671 }
672
673 if self.lex_number_literal(state) {
674 continue;
675 }
676
677 if self.lex_identifier_or_keyword(state) {
678 continue;
679 }
680
681 if self.lex_operator(state) {
682 continue;
683 }
684
685 if self.lex_delimiter(state) {
686 continue;
687 }
688
689 let start_pos = state.get_position();
691 if let Some(ch) = state.peek() {
692 state.advance(ch.len_utf8());
693 state.add_token(SwiftSyntaxKind::Error, start_pos, state.get_position());
694 }
695 }
696
697 let eof_pos = state.get_position();
699 state.add_token(SwiftSyntaxKind::Eof, eof_pos, eof_pos);
700
701 Ok(())
702 }
703}