1#![doc = include_str!("readme.md")]
2
3pub mod token_type;
4use crate::language::CrystalLanguage;
5use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
6pub use token_type::CrystalTokenType;
7
8pub(crate) type State<'a, S> = LexerState<'a, S, CrystalLanguage>;
9
10#[derive(Clone)]
12pub struct CrystalLexer<'config> {
13 #[allow(dead_code)]
14 config: &'config CrystalLanguage,
15}
16
17impl<'config> Lexer<CrystalLanguage> for CrystalLexer<'config> {
18 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], mut cache: &'a mut impl LexerCache<CrystalLanguage>) -> LexOutput<CrystalLanguage> {
19 let mut state = LexerState::new(source);
20 let result = self.run(&mut state);
21 if result.is_ok() {
22 state.add_eof()
23 }
24 state.finish_with_cache(result, &mut cache)
25 }
26}
27
28impl<'config> CrystalLexer<'config> {
29 pub fn new(config: &'config CrystalLanguage) -> Self {
31 Self { config }
32 }
33
34 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36 while state.not_at_end() {
37 let safe_point = state.get_position();
38
39 if self.skip_whitespace(state) {
40 continue;
41 }
42
43 if self.lex_newline(state) {
44 continue;
45 }
46
47 if self.lex_comment(state) {
48 continue;
49 }
50
51 if self.lex_string(state) {
52 continue;
53 }
54
55 if self.lex_number(state) {
56 continue;
57 }
58
59 if self.lex_keyword_or_identifier(state) {
60 continue;
61 }
62
63 if self.lex_operator(state) {
64 continue;
65 }
66
67 if self.lex_delimiter(state) {
68 continue;
69 }
70
71 let start_pos = state.get_position();
73 if let Some(ch) = state.peek() {
74 state.advance(ch.len_utf8());
75 state.add_token(CrystalTokenType::Error, start_pos, state.get_position())
76 }
77
78 state.advance_if_dead_lock(safe_point)
79 }
80
81 Ok(())
82 }
83
84 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
86 let start_pos = state.get_position();
87
88 while let Some(ch) = state.peek() {
89 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
90 }
91
92 if state.get_position() > start_pos {
93 state.add_token(CrystalTokenType::Whitespace, start_pos, state.get_position());
94 true
95 }
96 else {
97 false
98 }
99 }
100
101 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
103 let start_pos = state.get_position();
104
105 if let Some('\n') = state.peek() {
106 state.advance(1);
107 state.add_token(CrystalTokenType::Newline, start_pos, state.get_position());
108 true
109 }
110 else if let Some('\r') = state.peek() {
111 state.advance(1);
112 if let Some('\n') = state.peek() {
113 state.advance(1)
114 }
115 state.add_token(CrystalTokenType::Newline, start_pos, state.get_position());
116 true
117 }
118 else {
119 false
120 }
121 }
122
123 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
125 let start_pos = state.get_position();
126
127 if let Some('#') = state.peek() {
128 state.advance(1);
129
130 while let Some(ch) = state.peek() {
132 if ch == '\n' || ch == '\r' {
133 break;
134 }
135 state.advance(ch.len_utf8())
136 }
137
138 state.add_token(CrystalTokenType::Comment, start_pos, state.get_position());
139 true
140 }
141 else {
142 false
143 }
144 }
145
146 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
148 let start_pos = state.get_position();
149
150 if let Some(quote) = state.peek() {
151 if quote == '"' || quote == '\'' {
152 state.advance(1);
153
154 while let Some(ch) = state.peek() {
155 if ch == quote {
156 state.advance(1);
157 break;
158 }
159 else if ch == '\\' {
160 state.advance(1);
161 if let Some(_) = state.peek() {
162 state.advance(1)
163 }
164 }
165 else {
166 state.advance(ch.len_utf8())
167 }
168 }
169
170 state.add_token(CrystalTokenType::String, start_pos, state.get_position());
171 true
172 }
173 else {
174 false
175 }
176 }
177 else {
178 false
179 }
180 }
181
182 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
184 let start_pos = state.get_position();
185
186 if let Some(ch) = state.peek() {
187 if ch.is_ascii_digit() {
188 state.advance(1);
189
190 while let Some(ch) = state.peek() {
191 if ch.is_ascii_digit() || ch == '.' || ch == '_' { state.advance(1) } else { break }
192 }
193
194 state.add_token(CrystalTokenType::Number, start_pos, state.get_position());
195 true
196 }
197 else {
198 false
199 }
200 }
201 else {
202 false
203 }
204 }
205
206 fn lex_keyword_or_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
208 let start_pos = state.get_position();
209
210 if let Some(ch) = state.peek() {
211 if ch.is_ascii_alphabetic() || ch == '_' {
212 state.advance(ch.len_utf8());
213
214 while let Some(ch) = state.peek() {
215 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '?' || ch == '!' { state.advance(ch.len_utf8()) } else { break }
216 }
217
218 let end_pos = state.get_position();
219 let text = state.get_text_in(oak_core::Range { start: start_pos, end: end_pos });
220 let token_kind = match text.as_ref() {
221 "class" => CrystalTokenType::ClassKeyword,
223 "module" => CrystalTokenType::ModuleKeyword,
224 "def" => CrystalTokenType::DefKeyword,
225 "end" => CrystalTokenType::EndKeyword,
226 "if" => CrystalTokenType::IfKeyword,
227 "else" => CrystalTokenType::ElseKeyword,
228 "elsif" => CrystalTokenType::ElsifKeyword,
229 "unless" => CrystalTokenType::UnlessKeyword,
230 "case" => CrystalTokenType::CaseKeyword,
231 "when" => CrystalTokenType::WhenKeyword,
232 "then" => CrystalTokenType::ThenKeyword,
233 "while" => CrystalTokenType::WhileKeyword,
234 "until" => CrystalTokenType::UntilKeyword,
235 "for" => CrystalTokenType::ForKeyword,
236 "in" => CrystalTokenType::InKeyword,
237 "do" => CrystalTokenType::DoKeyword,
238 "begin" => CrystalTokenType::BeginKeyword,
239 "rescue" => CrystalTokenType::RescueKeyword,
240 "ensure" => CrystalTokenType::EnsureKeyword,
241 "break" => CrystalTokenType::BreakKeyword,
242 "next" => CrystalTokenType::NextKeyword,
243 "return" => CrystalTokenType::ReturnKeyword,
244 "yield" => CrystalTokenType::YieldKeyword,
245 "super" => CrystalTokenType::SuperKeyword,
246 "self" => CrystalTokenType::SelfKeyword,
247 "true" => CrystalTokenType::TrueKeyword,
248 "false" => CrystalTokenType::FalseKeyword,
249 "nil" => CrystalTokenType::NilKeyword,
250 "and" => CrystalTokenType::AndKeyword,
251 "or" => CrystalTokenType::OrKeyword,
252 "not" => CrystalTokenType::NotKeyword,
253 _ => CrystalTokenType::Identifier,
254 };
255
256 state.add_token(token_kind, start_pos, state.get_position());
257 true
258 }
259 else {
260 false
261 }
262 }
263 else {
264 false
265 }
266 }
267
268 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
270 let start_pos = state.get_position();
271
272 if let Some(ch) = state.peek() {
273 let token_kind = match ch {
274 '+' => {
275 state.advance(1);
276 if let Some('=') = state.peek() {
277 state.advance(1);
278 CrystalTokenType::PlusEqual
279 }
280 else {
281 CrystalTokenType::Plus
282 }
283 }
284 '-' => {
285 state.advance(1);
286 if let Some('=') = state.peek() {
287 state.advance(1);
288 CrystalTokenType::MinusEqual
289 }
290 else {
291 CrystalTokenType::Minus
292 }
293 }
294 '*' => {
295 state.advance(1);
296 if let Some('*') = state.peek() {
297 state.advance(1);
298 if let Some('=') = state.peek() {
299 state.advance(1);
300 CrystalTokenType::StarStarEqual
301 }
302 else {
303 CrystalTokenType::StarStar
304 }
305 }
306 else if let Some('=') = state.peek() {
307 state.advance(1);
308 CrystalTokenType::StarEqual
309 }
310 else {
311 CrystalTokenType::Star
312 }
313 }
314 '/' => {
315 state.advance(1);
316 if let Some('=') = state.peek() {
317 state.advance(1);
318 CrystalTokenType::SlashEqual
319 }
320 else {
321 CrystalTokenType::Slash
322 }
323 }
324 '%' => {
325 state.advance(1);
326 if let Some('=') = state.peek() {
327 state.advance(1);
328 CrystalTokenType::PercentEqual
329 }
330 else {
331 CrystalTokenType::Percent
332 }
333 }
334 '=' => {
335 state.advance(1);
336 if let Some('=') = state.peek() {
337 state.advance(1);
338 CrystalTokenType::EqualEqual
339 }
340 else if let Some('~') = state.peek() {
341 state.advance(1);
342 CrystalTokenType::Match
343 }
344 else {
345 CrystalTokenType::Equal
346 }
347 }
348 '!' => {
349 state.advance(1);
350 if let Some('=') = state.peek() {
351 state.advance(1);
352 CrystalTokenType::NotEqual
353 }
354 else if let Some('~') = state.peek() {
355 state.advance(1);
356 CrystalTokenType::NotMatch
357 }
358 else {
359 CrystalTokenType::Not
360 }
361 }
362 '<' => {
363 state.advance(1);
364 if let Some('=') = state.peek() {
365 state.advance(1);
366 if let Some('>') = state.peek() {
367 state.advance(1);
368 CrystalTokenType::Spaceship
369 }
370 else {
371 CrystalTokenType::LessEqual
372 }
373 }
374 else if let Some('<') = state.peek() {
375 state.advance(1);
376 if let Some('=') = state.peek() {
377 state.advance(1);
378 CrystalTokenType::LeftShiftEqual
379 }
380 else {
381 CrystalTokenType::LeftShift
382 }
383 }
384 else {
385 CrystalTokenType::Less
386 }
387 }
388 '>' => {
389 state.advance(1);
390 if let Some('=') = state.peek() {
391 state.advance(1);
392 CrystalTokenType::GreaterEqual
393 }
394 else if let Some('>') = state.peek() {
395 state.advance(1);
396 if let Some('=') = state.peek() {
397 state.advance(1);
398 CrystalTokenType::RightShiftEqual
399 }
400 else {
401 CrystalTokenType::RightShift
402 }
403 }
404 else {
405 CrystalTokenType::Greater
406 }
407 }
408 '&' => {
409 state.advance(1);
410 if let Some('&') = state.peek() {
411 state.advance(1);
412 if let Some('=') = state.peek() {
413 state.advance(1);
414 CrystalTokenType::LogicalAndEqual
415 }
416 else {
417 CrystalTokenType::LogicalAnd
418 }
419 }
420 else if let Some('=') = state.peek() {
421 state.advance(1);
422 CrystalTokenType::AndEqual
423 }
424 else {
425 CrystalTokenType::BitwiseAnd
426 }
427 }
428 '|' => {
429 state.advance(1);
430 if let Some('|') = state.peek() {
431 state.advance(1);
432 if let Some('=') = state.peek() {
433 state.advance(1);
434 CrystalTokenType::LogicalOrEqual
435 }
436 else {
437 CrystalTokenType::LogicalOr
438 }
439 }
440 else if let Some('=') = state.peek() {
441 state.advance(1);
442 CrystalTokenType::OrEqual
443 }
444 else {
445 CrystalTokenType::BitwiseOr
446 }
447 }
448 '^' => {
449 state.advance(1);
450 if let Some('=') = state.peek() {
451 state.advance(1);
452 CrystalTokenType::XorEqual
453 }
454 else {
455 CrystalTokenType::BitwiseXor
456 }
457 }
458 '~' => {
459 state.advance(1);
460 CrystalTokenType::BitwiseNot
461 }
462 _ => return false,
463 };
464
465 state.add_token(token_kind, start_pos, state.get_position());
466 true
467 }
468 else {
469 false
470 }
471 }
472
473 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
475 let start_pos = state.get_position();
476
477 if let Some(ch) = state.peek() {
478 let token_kind = match ch {
479 '(' => {
480 state.advance(1);
481 CrystalTokenType::LeftParen
482 }
483 ')' => {
484 state.advance(1);
485 CrystalTokenType::RightParen
486 }
487 '{' => {
488 state.advance(1);
489 CrystalTokenType::LeftBrace
490 }
491 '}' => {
492 state.advance(1);
493 CrystalTokenType::RightBrace
494 }
495 '[' => {
496 state.advance(1);
497 CrystalTokenType::LeftBracket
498 }
499 ']' => {
500 state.advance(1);
501 CrystalTokenType::RightBracket
502 }
503 ',' => {
504 state.advance(1);
505 CrystalTokenType::Comma
506 }
507 ';' => {
508 state.advance(1);
509 CrystalTokenType::Semicolon
510 }
511 '.' => {
512 state.advance(1);
513 if let Some('.') = state.peek() {
514 state.advance(1);
515 if let Some('.') = state.peek() {
516 state.advance(1);
517 CrystalTokenType::DotDotDot
518 }
519 else {
520 CrystalTokenType::DotDot
521 }
522 }
523 else {
524 CrystalTokenType::Dot
525 }
526 }
527 ':' => {
528 state.advance(1);
529 if let Some(':') = state.peek() {
530 state.advance(1);
531 CrystalTokenType::DoubleColon
532 }
533 else {
534 CrystalTokenType::At }
536 }
537 '?' => {
538 state.advance(1);
539 CrystalTokenType::Question
540 }
541 '@' => {
542 state.advance(1);
543 if let Some('@') = state.peek() {
544 state.advance(1);
545 CrystalTokenType::DoubleAt
546 }
547 else {
548 CrystalTokenType::At
549 }
550 }
551 '$' => {
552 state.advance(1);
553 CrystalTokenType::Dollar
554 }
555 _ => return false,
556 };
557
558 state.add_token(token_kind, start_pos, state.get_position());
559 true
560 }
561 else {
562 false
563 }
564 }
565}