1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::language::SwiftLanguage;
6pub use crate::lexer::token_type::SwiftTokenType;
7use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
8
9pub(crate) type State<'a, S> = LexerState<'a, S, SwiftLanguage>;
10
11#[derive(Clone, Debug)]
13pub struct SwiftLexer<'config> {
14 config: &'config SwiftLanguage,
15}
16
17impl<'config> Lexer<SwiftLanguage> for SwiftLexer<'config> {
18 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<SwiftLanguage>) -> LexOutput<SwiftLanguage> {
19 let mut state = State::new(source);
20 let result = self.run(&mut state);
21 if result.is_ok() {
22 state.add_eof();
23 }
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl<'config> SwiftLexer<'config> {
29 pub fn new(config: &'config SwiftLanguage) -> Self {
31 Self { config }
32 }
33
34 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
36 let start_pos = state.get_position();
37
38 while let Some(ch) = state.peek() {
39 if ch == ' ' || ch == '\t' {
40 state.advance(ch.len_utf8());
41 }
42 else {
43 break;
44 }
45 }
46
47 if state.get_position() > start_pos {
48 state.add_token(SwiftTokenType::Whitespace, start_pos, state.get_position());
49 true
50 }
51 else {
52 false
53 }
54 }
55
56 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
58 let start_pos = state.get_position();
59
60 if let Some('\n') = state.peek() {
61 state.advance(1);
62 state.add_token(SwiftTokenType::Newline, start_pos, state.get_position());
63 true
64 }
65 else if let Some('\r') = state.peek() {
66 state.advance(1);
67 if let Some('\n') = state.peek() {
68 state.advance(1);
69 }
70 state.add_token(SwiftTokenType::Newline, start_pos, state.get_position());
71 true
72 }
73 else {
74 false
75 }
76 }
77
78 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
80 let start_pos = state.get_position();
81
82 if let Some('/') = state.peek() {
83 if let Some('/') = state.peek_next_n(1) {
84 state.advance(2);
86 while let Some(ch) = state.peek() {
87 if ch == '\n' || ch == '\r' {
88 break;
89 }
90 state.advance(ch.len_utf8());
91 }
92 state.add_token(SwiftTokenType::Comment, start_pos, state.get_position());
93 true
94 }
95 else if let Some('*') = state.peek_next_n(1) {
96 state.advance(2);
98 let mut depth = 1;
99 while let Some(ch) = state.peek() {
100 if ch == '/'
101 && let Some('*') = state.peek_next_n(1)
102 {
103 state.advance(2);
104 depth += 1;
105 }
106 else if ch == '*'
107 && let Some('/') = state.peek_next_n(1)
108 {
109 state.advance(2);
110 depth -= 1;
111 if depth == 0 {
112 break;
113 }
114 }
115 else {
116 state.advance(ch.len_utf8());
117 }
118 }
119 state.add_token(SwiftTokenType::Comment, start_pos, state.get_position());
120 true
121 }
122 else {
123 false
124 }
125 }
126 else {
127 false
128 }
129 }
130
131 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
133 let start_pos = state.get_position();
134
135 let is_escaped = if let Some('`') = state.peek() {
137 state.advance(1);
138 true
139 }
140 else {
141 false
142 };
143
144 if let Some(ch) = state.peek() {
145 if ch.is_ascii_alphabetic() || ch == '_' {
146 state.advance(ch.len_utf8());
147
148 while let Some(ch) = state.peek() {
149 if ch.is_ascii_alphanumeric() || ch == '_' {
150 state.advance(ch.len_utf8());
151 }
152 else {
153 break;
154 }
155 }
156
157 if is_escaped {
159 if let Some('`') = state.peek() {
160 state.advance(1);
161 }
162 state.add_token(SwiftTokenType::Identifier, start_pos, state.get_position());
163 return true;
164 }
165
166 let text = state.get_text_in(core::range::Range { start: start_pos, end: state.get_position() });
168
169 let token_kind = match text.as_ref() {
170 "class" => SwiftTokenType::Class,
171 "struct" => SwiftTokenType::Struct,
172 "enum" => SwiftTokenType::Enum,
173 "protocol" => SwiftTokenType::Protocol,
174 "extension" => SwiftTokenType::Extension,
175 "func" => SwiftTokenType::Func,
176 "var" => SwiftTokenType::Var,
177 "let" => SwiftTokenType::Let,
178 "init" => SwiftTokenType::Init,
179 "deinit" => SwiftTokenType::Deinit,
180 "subscript" => SwiftTokenType::Subscript,
181 "typealias" => SwiftTokenType::Typealias,
182 "import" => SwiftTokenType::Import,
183 "if" => SwiftTokenType::If,
184 "else" => SwiftTokenType::Else,
185 "switch" => SwiftTokenType::Switch,
186 "case" => SwiftTokenType::Case,
187 "default" => SwiftTokenType::Default,
188 "for" => SwiftTokenType::For,
189 "while" => SwiftTokenType::While,
190 "repeat" => SwiftTokenType::Repeat,
191 "do" => SwiftTokenType::Do,
192 "break" => SwiftTokenType::Break,
193 "continue" => SwiftTokenType::Continue,
194 "fallthrough" => SwiftTokenType::Fallthrough,
195 "return" => SwiftTokenType::Return,
196 "throw" => SwiftTokenType::Throw,
197 "try" => SwiftTokenType::Try,
198 "catch" => SwiftTokenType::Catch,
199 "finally" => SwiftTokenType::Finally,
200 "guard" => SwiftTokenType::Guard,
201 "defer" => SwiftTokenType::Defer,
202 "public" => SwiftTokenType::Public,
203 "private" => SwiftTokenType::Private,
204 "internal" => SwiftTokenType::Internal,
205 "fileprivate" => SwiftTokenType::Fileprivate,
206 "open" => SwiftTokenType::Open,
207 "static" => SwiftTokenType::Static,
208 "final" => SwiftTokenType::Final,
209 "override" => SwiftTokenType::Override,
210 "mutating" => SwiftTokenType::Mutating,
211 "nonmutating" => SwiftTokenType::Nonmutating,
212 "lazy" => SwiftTokenType::Lazy,
213 "weak" => SwiftTokenType::Weak,
214 "unowned" => SwiftTokenType::Unowned,
215 "optional" => SwiftTokenType::Optional,
216 "required" => SwiftTokenType::Required,
217 "convenience" => SwiftTokenType::Convenience,
218 "dynamic" => SwiftTokenType::Dynamic,
219 "infix" => SwiftTokenType::Infix,
220 "prefix" => SwiftTokenType::Prefix,
221 "postfix" => SwiftTokenType::Postfix,
222 "Any" => SwiftTokenType::Any,
223 "AnyObject" => SwiftTokenType::AnyObject,
224 "Self" => SwiftTokenType::Self_,
225 "Type" => SwiftTokenType::Type,
226 "Protocol" => SwiftTokenType::Protocol_,
227 "true" => SwiftTokenType::True,
228 "false" => SwiftTokenType::False,
229 "nil" => SwiftTokenType::Nil,
230 "as" => SwiftTokenType::As,
231 "is" => SwiftTokenType::Is,
232 "in" => SwiftTokenType::In,
233 "where" => SwiftTokenType::Where,
234 "associatedtype" => SwiftTokenType::Associatedtype,
235 "operator" => SwiftTokenType::Operator,
236 "precedencegroup" => SwiftTokenType::Precedencegroup,
237 "indirect" => SwiftTokenType::Indirect,
238 "rethrows" => SwiftTokenType::Rethrows,
239 "throws" => SwiftTokenType::Throws,
240 "inout" => SwiftTokenType::Inout,
241 _ => SwiftTokenType::Identifier,
242 };
243 state.add_token(token_kind, start_pos, state.get_position());
244 true
245 }
246 else {
247 if is_escaped {
248 state.set_position(start_pos);
250 }
251 false
252 }
253 }
254 else {
255 if is_escaped {
256 state.set_position(start_pos);
258 }
259 false
260 }
261 }
262
263 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
265 let start_pos = state.get_position();
266
267 if let Some(ch) = state.peek() {
268 if ch.is_ascii_digit() {
269 state.advance(1);
270
271 if ch == '0' {
273 if let Some('b') | Some('B') = state.peek() {
274 state.advance(1);
275 while let Some(ch) = state.peek() {
276 if ch == '0' || ch == '1' || ch == '_' {
277 state.advance(1);
278 }
279 else {
280 break;
281 }
282 }
283 }
284 else if let Some('o') | Some('O') = state.peek() {
285 state.advance(1);
286 while let Some(ch) = state.peek() {
287 if ch.is_ascii_digit() && ch < '8' || ch == '_' {
288 state.advance(1);
289 }
290 else {
291 break;
292 }
293 }
294 }
295 else if let Some('x') | Some('X') = state.peek() {
296 state.advance(1);
297 while let Some(ch) = state.peek() {
298 if ch.is_ascii_hexdigit() || ch == '_' {
299 state.advance(1);
300 }
301 else {
302 break;
303 }
304 }
305 }
306 else {
307 while let Some(ch) = state.peek() {
309 if ch.is_ascii_digit() || ch == '_' {
310 state.advance(1);
311 }
312 else {
313 break;
314 }
315 }
316 }
317 }
318 else {
319 while let Some(ch) = state.peek() {
321 if ch.is_ascii_digit() || ch == '_' {
322 state.advance(1);
323 }
324 else {
325 break;
326 }
327 }
328 }
329
330 if let Some('.') = state.peek() {
332 if let Some(next) = state.peek_next_n(1) {
334 if next != '.' {
335 state.advance(1);
336 while let Some(ch) = state.peek() {
337 if ch.is_ascii_digit() || ch == '_' {
338 state.advance(1);
339 }
340 else {
341 break;
342 }
343 }
344 }
345 }
346 else {
347 state.advance(1);
349 }
350 }
351
352 if let Some('e') | Some('E') = state.peek() {
354 state.advance(1);
355 if let Some('+') | Some('-') = state.peek() {
356 state.advance(1);
357 }
358 while let Some(ch) = state.peek() {
359 if ch.is_ascii_digit() || ch == '_' {
360 state.advance(1);
361 }
362 else {
363 break;
364 }
365 }
366 }
367
368 state.add_token(SwiftTokenType::NumberLiteral, start_pos, state.get_position());
369 true
370 }
371 else {
372 false
373 }
374 }
375 else {
376 false
377 }
378 }
379
380 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
382 let start_pos = state.get_position();
383
384 if let Some('"') = state.peek() {
386 if let Some('"') = state.peek_next_n(1) {
387 if let Some('"') = state.peek_next_n(2) {
388 state.advance(3);
390 while let Some(ch) = state.peek() {
391 if ch == '"' {
392 if let Some('"') = state.peek_next_n(1) {
393 if let Some('"') = state.peek_next_n(2) {
394 state.advance(3);
395 break;
396 }
397 }
398 }
399 state.advance(ch.len_utf8());
400 }
401 state.add_token(SwiftTokenType::StringLiteral, start_pos, state.get_position());
402 return true;
403 }
404 }
405
406 state.advance(1);
408 while let Some(ch) = state.peek() {
409 if ch == '"' {
410 state.advance(1);
411 break;
412 }
413 else if ch == '\\' {
414 state.advance(1);
415 if let Some(_) = state.peek() {
416 state.advance(1);
417 }
418 }
419 else if ch == '\n' || ch == '\r' {
420 break; }
422 else {
423 state.advance(ch.len_utf8());
424 }
425 }
426 state.add_token(SwiftTokenType::StringLiteral, start_pos, state.get_position());
427 true
428 }
429 else {
430 false
431 }
432 }
433
434 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
436 let start_pos = state.get_position();
437
438 if let Some(ch) = state.peek() {
439 let token_kind = match ch {
440 '+' => {
441 state.advance(1);
442 if let Some('=') = state.peek() {
443 state.advance(1);
444 SwiftTokenType::PlusAssign
445 }
446 else {
447 SwiftTokenType::Plus
448 }
449 }
450 '-' => {
451 state.advance(1);
452 match state.peek() {
453 Some('=') => {
454 state.advance(1);
455 SwiftTokenType::MinusAssign
456 }
457 Some('>') => {
458 state.advance(1);
459 SwiftTokenType::Arrow
460 }
461 _ => SwiftTokenType::Minus,
462 }
463 }
464 '*' => {
465 state.advance(1);
466 if let Some('=') = state.peek() {
467 state.advance(1);
468 SwiftTokenType::StarAssign
469 }
470 else {
471 SwiftTokenType::Star
472 }
473 }
474 '/' => {
475 state.advance(1);
476 if let Some('=') = state.peek() {
477 state.advance(1);
478 SwiftTokenType::SlashAssign
479 }
480 else {
481 SwiftTokenType::Slash
482 }
483 }
484 '%' => {
485 state.advance(1);
486 if let Some('=') = state.peek() {
487 state.advance(1);
488 SwiftTokenType::PercentAssign
489 }
490 else {
491 SwiftTokenType::Percent
492 }
493 }
494 '=' => {
495 state.advance(1);
496 if let Some('=') = state.peek() {
497 state.advance(1);
498 SwiftTokenType::Equal
499 }
500 else {
501 SwiftTokenType::Assign
502 }
503 }
504 '!' => {
505 state.advance(1);
506 if let Some('=') = state.peek() {
507 state.advance(1);
508 SwiftTokenType::NotEqual
509 }
510 else {
511 SwiftTokenType::LogicalNot
512 }
513 }
514 '<' => {
515 state.advance(1);
516 match state.peek() {
517 Some('=') => {
518 state.advance(1);
519 SwiftTokenType::LessEqual
520 }
521 Some('<') => {
522 state.advance(1);
523 if let Some('=') = state.peek() {
524 state.advance(1);
525 SwiftTokenType::LeftShiftAssign
526 }
527 else {
528 SwiftTokenType::LeftShift
529 }
530 }
531 _ => SwiftTokenType::Less,
532 }
533 }
534 '>' => {
535 state.advance(1);
536 match state.peek() {
537 Some('=') => {
538 state.advance(1);
539 SwiftTokenType::GreaterEqual
540 }
541 Some('>') => {
542 state.advance(1);
543 if let Some('=') = state.peek() {
544 state.advance(1);
545 SwiftTokenType::RightShiftAssign
546 }
547 else {
548 SwiftTokenType::RightShift
549 }
550 }
551 _ => SwiftTokenType::Greater,
552 }
553 }
554 '&' => {
555 state.advance(1);
556 match state.peek() {
557 Some('&') => {
558 state.advance(1);
559 SwiftTokenType::LogicalAnd
560 }
561 Some('=') => {
562 state.advance(1);
563 SwiftTokenType::AndAssign
564 }
565 _ => SwiftTokenType::BitAnd,
566 }
567 }
568 '|' => {
569 state.advance(1);
570 match state.peek() {
571 Some('|') => {
572 state.advance(1);
573 SwiftTokenType::LogicalOr
574 }
575 Some('=') => {
576 state.advance(1);
577 SwiftTokenType::OrAssign
578 }
579 _ => SwiftTokenType::BitOr,
580 }
581 }
582 '^' => {
583 state.advance(1);
584 if let Some('=') = state.peek() {
585 state.advance(1);
586 SwiftTokenType::XorAssign
587 }
588 else {
589 SwiftTokenType::BitXor
590 }
591 }
592 '~' => {
593 state.advance(1);
594 SwiftTokenType::BitNot
595 }
596 '?' => {
597 state.advance(1);
598 if let Some('?') = state.peek() {
599 state.advance(1);
600 SwiftTokenType::QuestionQuestion
601 }
602 else {
603 SwiftTokenType::Question
604 }
605 }
606 '.' => {
607 state.advance(1);
608 match state.peek() {
609 Some('.') => {
610 state.advance(1);
611 match state.peek() {
612 Some('.') => {
613 state.advance(1);
614 SwiftTokenType::ClosedRange
615 }
616 Some('<') => {
617 state.advance(1);
618 SwiftTokenType::Range
619 }
620 _ => SwiftTokenType::Dot, }
622 }
623 _ => SwiftTokenType::Dot,
624 }
625 }
626 _ => return false,
627 };
628
629 state.add_token(token_kind, start_pos, state.get_position());
630 true
631 }
632 else {
633 false
634 }
635 }
636
637 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
639 let start_pos = state.get_position();
640
641 if let Some(ch) = state.peek() {
642 let token_kind = match ch {
643 '(' => SwiftTokenType::LeftParen,
644 ')' => SwiftTokenType::RightParen,
645 '[' => SwiftTokenType::LeftBracket,
646 ']' => SwiftTokenType::RightBracket,
647 '{' => SwiftTokenType::LeftBrace,
648 '}' => SwiftTokenType::RightBrace,
649 ',' => SwiftTokenType::Comma,
650 ';' => SwiftTokenType::Semicolon,
651 ':' => SwiftTokenType::Colon,
652 '@' => SwiftTokenType::At,
653 '#' => SwiftTokenType::Hash,
654 '$' => SwiftTokenType::Dollar,
655 '_' => SwiftTokenType::Underscore,
656 '\\' => SwiftTokenType::Backslash,
657 _ => return false,
658 };
659
660 state.advance(ch.len_utf8());
661 state.add_token(token_kind, start_pos, state.get_position());
662 true
663 }
664 else {
665 false
666 }
667 }
668}
669
670impl<'config> SwiftLexer<'config> {
671 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
672 while state.not_at_end() {
673 let safe_point = state.get_position();
674
675 if self.skip_whitespace(state) {
677 continue;
678 }
679
680 if self.lex_newline(state) {
681 continue;
682 }
683
684 if self.lex_comment(state) {
685 continue;
686 }
687
688 if self.lex_string_literal(state) {
689 continue;
690 }
691
692 if self.lex_number_literal(state) {
693 continue;
694 }
695
696 if self.lex_identifier_or_keyword(state) {
697 continue;
698 }
699
700 if self.lex_operator(state) {
701 continue;
702 }
703
704 if self.lex_delimiter(state) {
705 continue;
706 }
707
708 let start_pos = state.get_position();
710 if let Some(ch) = state.peek() {
711 state.advance(ch.len_utf8());
712 state.add_token(SwiftTokenType::Error, start_pos, state.get_position());
713 }
714
715 state.advance_if_dead_lock(safe_point)
716 }
717
718 Ok(())
719 }
720}