Skip to main content

oak_crystal/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2
3pub mod token_type;
4use crate::language::CrystalLanguage;
5use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
6pub use token_type::CrystalTokenType;
7
8pub(crate) type State<'a, S> = LexerState<'a, S, CrystalLanguage>;
9
10/// Lexer for the Crystal language.
11#[derive(Clone)]
12pub struct CrystalLexer<'config> {
13    #[allow(dead_code)]
14    config: &'config CrystalLanguage,
15}
16
17impl<'config> Lexer<CrystalLanguage> for CrystalLexer<'config> {
18    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], mut cache: &'a mut impl LexerCache<CrystalLanguage>) -> LexOutput<CrystalLanguage> {
19        let mut state = LexerState::new(source);
20        let result = self.run(&mut state);
21        if result.is_ok() {
22            state.add_eof()
23        }
24        state.finish_with_cache(result, &mut cache)
25    }
26}
27
28impl<'config> CrystalLexer<'config> {
29    /// Creates a new `CrystalLexer` with the given configuration.
30    pub fn new(config: &'config CrystalLanguage) -> Self {
31        Self { config }
32    }
33
34    /// Main lexing loop.
35    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36        while state.not_at_end() {
37            let safe_point = state.get_position();
38
39            if self.skip_whitespace(state) {
40                continue;
41            }
42
43            if self.lex_newline(state) {
44                continue;
45            }
46
47            if self.lex_comment(state) {
48                continue;
49            }
50
51            if self.lex_string(state) {
52                continue;
53            }
54
55            if self.lex_number(state) {
56                continue;
57            }
58
59            if self.lex_keyword_or_identifier(state) {
60                continue;
61            }
62
63            if self.lex_operator(state) {
64                continue;
65            }
66
67            if self.lex_delimiter(state) {
68                continue;
69            }
70
71            // If no rule matches, skip current character and mark error
72            let start_pos = state.get_position();
73            if let Some(ch) = state.peek() {
74                state.advance(ch.len_utf8());
75                state.add_token(CrystalTokenType::Error, start_pos, state.get_position())
76            }
77
78            state.advance_if_dead_lock(safe_point)
79        }
80
81        Ok(())
82    }
83
84    /// Skip whitespace
85    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
86        let start_pos = state.get_position();
87
88        while let Some(ch) = state.peek() {
89            if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
90        }
91
92        if state.get_position() > start_pos {
93            state.add_token(CrystalTokenType::Whitespace, start_pos, state.get_position());
94            true
95        }
96        else {
97            false
98        }
99    }
100
101    /// Handle newlines
102    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
103        let start_pos = state.get_position();
104
105        if let Some('\n') = state.peek() {
106            state.advance(1);
107            state.add_token(CrystalTokenType::Newline, start_pos, state.get_position());
108            true
109        }
110        else if let Some('\r') = state.peek() {
111            state.advance(1);
112            if let Some('\n') = state.peek() {
113                state.advance(1)
114            }
115            state.add_token(CrystalTokenType::Newline, start_pos, state.get_position());
116            true
117        }
118        else {
119            false
120        }
121    }
122
123    /// Handle comments
124    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
125        let start_pos = state.get_position();
126
127        if let Some('#') = state.peek() {
128            state.advance(1);
129
130            // Single-line comment, read until end of line
131            while let Some(ch) = state.peek() {
132                if ch == '\n' || ch == '\r' {
133                    break;
134                }
135                state.advance(ch.len_utf8())
136            }
137
138            state.add_token(CrystalTokenType::Comment, start_pos, state.get_position());
139            true
140        }
141        else {
142            false
143        }
144    }
145
146    /// Handle strings
147    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
148        let start_pos = state.get_position();
149
150        if let Some(quote) = state.peek() {
151            if quote == '"' || quote == '\'' {
152                state.advance(1);
153
154                while let Some(ch) = state.peek() {
155                    if ch == quote {
156                        state.advance(1);
157                        break;
158                    }
159                    else if ch == '\\' {
160                        state.advance(1);
161                        if let Some(_) = state.peek() {
162                            state.advance(1)
163                        }
164                    }
165                    else {
166                        state.advance(ch.len_utf8())
167                    }
168                }
169
170                state.add_token(CrystalTokenType::String, start_pos, state.get_position());
171                true
172            }
173            else {
174                false
175            }
176        }
177        else {
178            false
179        }
180    }
181
182    /// Handle numbers
183    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
184        let start_pos = state.get_position();
185
186        if let Some(ch) = state.peek() {
187            if ch.is_ascii_digit() {
188                state.advance(1);
189
190                while let Some(ch) = state.peek() {
191                    if ch.is_ascii_digit() || ch == '.' || ch == '_' { state.advance(1) } else { break }
192                }
193
194                state.add_token(CrystalTokenType::Number, start_pos, state.get_position());
195                true
196            }
197            else {
198                false
199            }
200        }
201        else {
202            false
203        }
204    }
205
206    /// Handle keywords or identifiers
207    fn lex_keyword_or_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
208        let start_pos = state.get_position();
209
210        if let Some(ch) = state.peek() {
211            if ch.is_ascii_alphabetic() || ch == '_' {
212                state.advance(ch.len_utf8());
213
214                while let Some(ch) = state.peek() {
215                    if ch.is_ascii_alphanumeric() || ch == '_' || ch == '?' || ch == '!' { state.advance(ch.len_utf8()) } else { break }
216                }
217
218                let end_pos = state.get_position();
219                let text = state.get_text_in(oak_core::Range { start: start_pos, end: end_pos });
220                let token_kind = match text.as_ref() {
221                    // Crystal keywords
222                    "class" => CrystalTokenType::ClassKeyword,
223                    "module" => CrystalTokenType::ModuleKeyword,
224                    "def" => CrystalTokenType::DefKeyword,
225                    "end" => CrystalTokenType::EndKeyword,
226                    "if" => CrystalTokenType::IfKeyword,
227                    "else" => CrystalTokenType::ElseKeyword,
228                    "elsif" => CrystalTokenType::ElsifKeyword,
229                    "unless" => CrystalTokenType::UnlessKeyword,
230                    "case" => CrystalTokenType::CaseKeyword,
231                    "when" => CrystalTokenType::WhenKeyword,
232                    "then" => CrystalTokenType::ThenKeyword,
233                    "while" => CrystalTokenType::WhileKeyword,
234                    "until" => CrystalTokenType::UntilKeyword,
235                    "for" => CrystalTokenType::ForKeyword,
236                    "in" => CrystalTokenType::InKeyword,
237                    "do" => CrystalTokenType::DoKeyword,
238                    "begin" => CrystalTokenType::BeginKeyword,
239                    "rescue" => CrystalTokenType::RescueKeyword,
240                    "ensure" => CrystalTokenType::EnsureKeyword,
241                    "break" => CrystalTokenType::BreakKeyword,
242                    "next" => CrystalTokenType::NextKeyword,
243                    "return" => CrystalTokenType::ReturnKeyword,
244                    "yield" => CrystalTokenType::YieldKeyword,
245                    "super" => CrystalTokenType::SuperKeyword,
246                    "self" => CrystalTokenType::SelfKeyword,
247                    "true" => CrystalTokenType::TrueKeyword,
248                    "false" => CrystalTokenType::FalseKeyword,
249                    "nil" => CrystalTokenType::NilKeyword,
250                    "and" => CrystalTokenType::AndKeyword,
251                    "or" => CrystalTokenType::OrKeyword,
252                    "not" => CrystalTokenType::NotKeyword,
253                    _ => CrystalTokenType::Identifier,
254                };
255
256                state.add_token(token_kind, start_pos, state.get_position());
257                true
258            }
259            else {
260                false
261            }
262        }
263        else {
264            false
265        }
266    }
267
268    /// Handle operators
269    fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
270        let start_pos = state.get_position();
271
272        if let Some(ch) = state.peek() {
273            let token_kind = match ch {
274                '+' => {
275                    state.advance(1);
276                    if let Some('=') = state.peek() {
277                        state.advance(1);
278                        CrystalTokenType::PlusEqual
279                    }
280                    else {
281                        CrystalTokenType::Plus
282                    }
283                }
284                '-' => {
285                    state.advance(1);
286                    if let Some('=') = state.peek() {
287                        state.advance(1);
288                        CrystalTokenType::MinusEqual
289                    }
290                    else {
291                        CrystalTokenType::Minus
292                    }
293                }
294                '*' => {
295                    state.advance(1);
296                    if let Some('*') = state.peek() {
297                        state.advance(1);
298                        if let Some('=') = state.peek() {
299                            state.advance(1);
300                            CrystalTokenType::StarStarEqual
301                        }
302                        else {
303                            CrystalTokenType::StarStar
304                        }
305                    }
306                    else if let Some('=') = state.peek() {
307                        state.advance(1);
308                        CrystalTokenType::StarEqual
309                    }
310                    else {
311                        CrystalTokenType::Star
312                    }
313                }
314                '/' => {
315                    state.advance(1);
316                    if let Some('=') = state.peek() {
317                        state.advance(1);
318                        CrystalTokenType::SlashEqual
319                    }
320                    else {
321                        CrystalTokenType::Slash
322                    }
323                }
324                '%' => {
325                    state.advance(1);
326                    if let Some('=') = state.peek() {
327                        state.advance(1);
328                        CrystalTokenType::PercentEqual
329                    }
330                    else {
331                        CrystalTokenType::Percent
332                    }
333                }
334                '=' => {
335                    state.advance(1);
336                    if let Some('=') = state.peek() {
337                        state.advance(1);
338                        CrystalTokenType::EqualEqual
339                    }
340                    else if let Some('~') = state.peek() {
341                        state.advance(1);
342                        CrystalTokenType::Match
343                    }
344                    else {
345                        CrystalTokenType::Equal
346                    }
347                }
348                '!' => {
349                    state.advance(1);
350                    if let Some('=') = state.peek() {
351                        state.advance(1);
352                        CrystalTokenType::NotEqual
353                    }
354                    else if let Some('~') = state.peek() {
355                        state.advance(1);
356                        CrystalTokenType::NotMatch
357                    }
358                    else {
359                        CrystalTokenType::Not
360                    }
361                }
362                '<' => {
363                    state.advance(1);
364                    if let Some('=') = state.peek() {
365                        state.advance(1);
366                        if let Some('>') = state.peek() {
367                            state.advance(1);
368                            CrystalTokenType::Spaceship
369                        }
370                        else {
371                            CrystalTokenType::LessEqual
372                        }
373                    }
374                    else if let Some('<') = state.peek() {
375                        state.advance(1);
376                        if let Some('=') = state.peek() {
377                            state.advance(1);
378                            CrystalTokenType::LeftShiftEqual
379                        }
380                        else {
381                            CrystalTokenType::LeftShift
382                        }
383                    }
384                    else {
385                        CrystalTokenType::Less
386                    }
387                }
388                '>' => {
389                    state.advance(1);
390                    if let Some('=') = state.peek() {
391                        state.advance(1);
392                        CrystalTokenType::GreaterEqual
393                    }
394                    else if let Some('>') = state.peek() {
395                        state.advance(1);
396                        if let Some('=') = state.peek() {
397                            state.advance(1);
398                            CrystalTokenType::RightShiftEqual
399                        }
400                        else {
401                            CrystalTokenType::RightShift
402                        }
403                    }
404                    else {
405                        CrystalTokenType::Greater
406                    }
407                }
408                '&' => {
409                    state.advance(1);
410                    if let Some('&') = state.peek() {
411                        state.advance(1);
412                        if let Some('=') = state.peek() {
413                            state.advance(1);
414                            CrystalTokenType::LogicalAndEqual
415                        }
416                        else {
417                            CrystalTokenType::LogicalAnd
418                        }
419                    }
420                    else if let Some('=') = state.peek() {
421                        state.advance(1);
422                        CrystalTokenType::AndEqual
423                    }
424                    else {
425                        CrystalTokenType::BitwiseAnd
426                    }
427                }
428                '|' => {
429                    state.advance(1);
430                    if let Some('|') = state.peek() {
431                        state.advance(1);
432                        if let Some('=') = state.peek() {
433                            state.advance(1);
434                            CrystalTokenType::LogicalOrEqual
435                        }
436                        else {
437                            CrystalTokenType::LogicalOr
438                        }
439                    }
440                    else if let Some('=') = state.peek() {
441                        state.advance(1);
442                        CrystalTokenType::OrEqual
443                    }
444                    else {
445                        CrystalTokenType::BitwiseOr
446                    }
447                }
448                '^' => {
449                    state.advance(1);
450                    if let Some('=') = state.peek() {
451                        state.advance(1);
452                        CrystalTokenType::XorEqual
453                    }
454                    else {
455                        CrystalTokenType::BitwiseXor
456                    }
457                }
458                '~' => {
459                    state.advance(1);
460                    CrystalTokenType::BitwiseNot
461                }
462                _ => return false,
463            };
464
465            state.add_token(token_kind, start_pos, state.get_position());
466            true
467        }
468        else {
469            false
470        }
471    }
472
473    /// Handle delimiters
474    fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
475        let start_pos = state.get_position();
476
477        if let Some(ch) = state.peek() {
478            let token_kind = match ch {
479                '(' => {
480                    state.advance(1);
481                    CrystalTokenType::LeftParen
482                }
483                ')' => {
484                    state.advance(1);
485                    CrystalTokenType::RightParen
486                }
487                '{' => {
488                    state.advance(1);
489                    CrystalTokenType::LeftBrace
490                }
491                '}' => {
492                    state.advance(1);
493                    CrystalTokenType::RightBrace
494                }
495                '[' => {
496                    state.advance(1);
497                    CrystalTokenType::LeftBracket
498                }
499                ']' => {
500                    state.advance(1);
501                    CrystalTokenType::RightBracket
502                }
503                ',' => {
504                    state.advance(1);
505                    CrystalTokenType::Comma
506                }
507                ';' => {
508                    state.advance(1);
509                    CrystalTokenType::Semicolon
510                }
511                '.' => {
512                    state.advance(1);
513                    if let Some('.') = state.peek() {
514                        state.advance(1);
515                        if let Some('.') = state.peek() {
516                            state.advance(1);
517                            CrystalTokenType::DotDotDot
518                        }
519                        else {
520                            CrystalTokenType::DotDot
521                        }
522                    }
523                    else {
524                        CrystalTokenType::Dot
525                    }
526                }
527                ':' => {
528                    state.advance(1);
529                    if let Some(':') = state.peek() {
530                        state.advance(1);
531                        CrystalTokenType::DoubleColon
532                    }
533                    else {
534                        CrystalTokenType::At // In Crystal, colon can be at the end of a symbol or for named arguments
535                    }
536                }
537                '?' => {
538                    state.advance(1);
539                    CrystalTokenType::Question
540                }
541                '@' => {
542                    state.advance(1);
543                    if let Some('@') = state.peek() {
544                        state.advance(1);
545                        CrystalTokenType::DoubleAt
546                    }
547                    else {
548                        CrystalTokenType::At
549                    }
550                }
551                '$' => {
552                    state.advance(1);
553                    CrystalTokenType::Dollar
554                }
555                _ => return false,
556            };
557
558            state.add_token(token_kind, start_pos, state.get_position());
559            true
560        }
561        else {
562            false
563        }
564    }
565}