1use crate::{kind::JuliaSyntaxKind, language::JuliaLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, JuliaLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct JuliaLexer<'config> {
8 _config: &'config JuliaLanguage,
9}
10
11impl<'config> JuliaLexer<'config> {
12 pub fn new(config: &'config JuliaLanguage) -> Self {
13 Self { _config: config }
14 }
15}
16
17impl<'config> Lexer<JuliaLanguage> for JuliaLexer<'config> {
18 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<JuliaLanguage>) -> LexOutput<JuliaLanguage> {
19 let mut state = LexerState::new(source);
20 let result = self.run(&mut state);
21 if result.is_ok() {
22 state.add_eof();
23 }
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl<'config> JuliaLexer<'config> {
29 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
31 let start_pos = state.get_position();
32
33 while let Some(ch) = state.peek() {
34 if ch == ' ' || ch == '\t' {
35 state.advance(ch.len_utf8());
36 }
37 else {
38 break;
39 }
40 }
41
42 if state.get_position() > start_pos {
43 state.add_token(JuliaSyntaxKind::Whitespace, start_pos, state.get_position());
44 true
45 }
46 else {
47 false
48 }
49 }
50
51 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
53 let start_pos = state.get_position();
54
55 if let Some('\n') = state.peek() {
56 state.advance(1);
57 state.add_token(JuliaSyntaxKind::Newline, start_pos, state.get_position());
58 true
59 }
60 else if let Some('\r') = state.peek() {
61 state.advance(1);
62 if let Some('\n') = state.peek() {
63 state.advance(1);
64 }
65 state.add_token(JuliaSyntaxKind::Newline, start_pos, state.get_position());
66 true
67 }
68 else {
69 false
70 }
71 }
72
73 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
75 let start_pos = state.get_position();
76
77 if let Some(ch) = state.peek() {
78 if ch.is_ascii_alphabetic() || ch == '_' {
79 state.advance(ch.len_utf8());
80
81 while let Some(ch) = state.peek() {
82 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '!' || ch == '?' {
83 state.advance(ch.len_utf8());
84 }
85 else {
86 break;
87 }
88 }
89
90 let end_pos = state.get_position();
91 let identifier_str = state.get_text_in((start_pos..end_pos).into());
92
93 if let Some(keyword_kind) = JuliaSyntaxKind::from_str(identifier_str.as_ref()) {
95 state.add_token(keyword_kind, start_pos, end_pos);
96 }
97 else {
98 state.add_token(JuliaSyntaxKind::Identifier, start_pos, end_pos);
99 }
100 true
101 }
102 else {
103 false
104 }
105 }
106 else {
107 false
108 }
109 }
110
111 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113 let start_pos = state.get_position();
114
115 if let Some(ch) = state.peek() {
116 if ch.is_ascii_digit() {
117 state.advance(1);
118
119 while let Some(ch) = state.peek() {
121 if ch.is_ascii_digit() || ch == '_' {
122 state.advance(1);
123 }
124 else {
125 break;
126 }
127 }
128
129 let mut is_float = false;
130
131 if let Some('.') = state.peek() {
133 if let Some(next_ch) = state.peek_next_n(1) {
135 if next_ch.is_ascii_digit() {
136 is_float = true;
137 state.advance(1); while let Some(ch) = state.peek() {
140 if ch.is_ascii_digit() || ch == '_' {
141 state.advance(1);
142 }
143 else {
144 break;
145 }
146 }
147 }
148 }
149 }
150
151 if let Some(ch) = state.peek() {
153 if ch == 'e' || ch == 'E' {
154 is_float = true;
155 state.advance(1);
156
157 if let Some(sign) = state.peek() {
159 if sign == '+' || sign == '-' {
160 state.advance(1);
161 }
162 }
163
164 while let Some(ch) = state.peek() {
166 if ch.is_ascii_digit() {
167 state.advance(1);
168 }
169 else {
170 break;
171 }
172 }
173 }
174 }
175
176 if let Some(ch) = state.peek() {
178 if ch.is_ascii_alphabetic() {
179 while let Some(ch) = state.peek() {
180 if ch.is_ascii_alphanumeric() {
181 state.advance(1);
182 }
183 else {
184 break;
185 }
186 }
187 }
188 }
189
190 let token_kind = if is_float { JuliaSyntaxKind::FloatLiteral } else { JuliaSyntaxKind::IntegerLiteral };
191
192 state.add_token(token_kind, start_pos, state.get_position());
193 true
194 }
195 else {
196 false
197 }
198 }
199 else {
200 false
201 }
202 }
203
204 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
206 let start_pos = state.get_position();
207
208 if let Some(quote) = state.peek() {
209 if quote == '"' || quote == '\'' {
210 state.advance(1);
211 let mut found_end = false;
212
213 while let Some(ch) = state.peek() {
214 if ch == quote {
215 state.advance(1);
216 found_end = true;
217 break;
218 }
219 else if ch == '\\' {
220 state.advance(1);
222 if let Some(_) = state.peek() {
223 state.advance(1);
224 }
225 }
226 else {
227 state.advance(ch.len_utf8());
228 }
229 }
230
231 if found_end {
232 let token_kind = if quote == '\'' { JuliaSyntaxKind::CharLiteral } else { JuliaSyntaxKind::StringLiteral };
233 state.add_token(token_kind, start_pos, state.get_position());
234 true
235 }
236 else {
237 state.set_position(start_pos);
239 false
240 }
241 }
242 else {
243 false
244 }
245 }
246 else {
247 false
248 }
249 }
250
251 fn lex_triple_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
253 let start_pos = state.get_position();
254
255 if let Some('"') = state.peek() {
257 if let Some('"') = state.peek_next_n(1) {
258 if let Some('"') = state.peek_next_n(2) {
259 state.advance(3);
260
261 while let Some(ch) = state.peek() {
263 if ch == '"' {
264 if let Some('"') = state.peek_next_n(1) {
265 if let Some('"') = state.peek_next_n(2) {
266 state.advance(3);
267 state.add_token(JuliaSyntaxKind::StringLiteral, start_pos, state.get_position());
268 return true;
269 }
270 }
271 }
272 state.advance(ch.len_utf8());
273 }
274
275 state.set_position(start_pos);
277 }
278 }
279 }
280 false
281 }
282
283 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
285 let start_pos = state.get_position();
286
287 if let Some('#') = state.peek() {
288 if let Some('=') = state.peek_next_n(1) {
290 state.advance(2);
291 let mut depth = 1;
292
293 while let Some(ch) = state.peek() {
294 if depth == 0 {
295 break;
296 }
297 if ch == '#' && state.peek_next_n(1) == Some('=') {
298 depth += 1;
299 state.advance(2);
300 }
301 else if ch == '=' && state.peek_next_n(1) == Some('#') {
302 depth -= 1;
303 state.advance(2);
304 }
305 else {
306 state.advance(ch.len_utf8());
307 }
308 }
309
310 state.add_token(JuliaSyntaxKind::Comment, start_pos, state.get_position());
311 true
312 }
313 else {
314 state.advance(1);
316
317 while let Some(ch) = state.peek() {
318 if ch == '\n' || ch == '\r' {
319 break;
320 }
321 state.advance(ch.len_utf8());
322 }
323
324 state.add_token(JuliaSyntaxKind::Comment, start_pos, state.get_position());
325 true
326 }
327 }
328 else {
329 false
330 }
331 }
332
333 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
335 let start_pos = state.get_position();
336
337 if let Some(ch) = state.peek() {
338 let token_kind = match ch {
339 '+' => {
340 state.advance(1);
341 if let Some('=') = state.peek() {
342 state.advance(1);
343 JuliaSyntaxKind::PlusAssign
344 }
345 else {
346 JuliaSyntaxKind::Plus
347 }
348 }
349 '-' => {
350 state.advance(1);
351 if let Some('=') = state.peek() {
352 state.advance(1);
353 JuliaSyntaxKind::MinusAssign
354 }
355 else if let Some('>') = state.peek() {
356 state.advance(1);
357 JuliaSyntaxKind::Arrow
358 }
359 else {
360 JuliaSyntaxKind::Minus
361 }
362 }
363 '*' => {
364 state.advance(1);
365 if let Some('=') = state.peek() {
366 state.advance(1);
367 JuliaSyntaxKind::StarAssign
368 }
369 else {
370 JuliaSyntaxKind::Star
371 }
372 }
373 '/' => {
374 state.advance(1);
375 if let Some('=') = state.peek() {
376 state.advance(1);
377 JuliaSyntaxKind::SlashAssign
378 }
379 else {
380 JuliaSyntaxKind::Slash
381 }
382 }
383 '%' => {
384 state.advance(1);
385 if let Some('=') = state.peek() {
386 state.advance(1);
387 JuliaSyntaxKind::PercentAssign
388 }
389 else {
390 JuliaSyntaxKind::Percent
391 }
392 }
393 '^' => {
394 state.advance(1);
395 if let Some('=') = state.peek() {
396 state.advance(1);
397 JuliaSyntaxKind::CaretAssign
398 }
399 else {
400 JuliaSyntaxKind::Caret
401 }
402 }
403 '=' => {
404 state.advance(1);
405 if let Some('=') = state.peek() {
406 state.advance(1);
407 JuliaSyntaxKind::Equal
408 }
409 else if let Some('>') = state.peek() {
410 state.advance(1);
411 JuliaSyntaxKind::FatArrow
412 }
413 else {
414 JuliaSyntaxKind::Assign
415 }
416 }
417 '!' => {
418 state.advance(1);
419 if let Some('=') = state.peek() {
420 state.advance(1);
421 JuliaSyntaxKind::NotEqual
422 }
423 else {
424 JuliaSyntaxKind::Not
425 }
426 }
427 '<' => {
428 state.advance(1);
429 if let Some('=') = state.peek() {
430 state.advance(1);
431 JuliaSyntaxKind::LessEqual
432 }
433 else if let Some('<') = state.peek() {
434 state.advance(1);
435 JuliaSyntaxKind::LeftShift
436 }
437 else {
438 JuliaSyntaxKind::LessThan
439 }
440 }
441 '>' => {
442 state.advance(1);
443 if let Some('=') = state.peek() {
444 state.advance(1);
445 JuliaSyntaxKind::GreaterEqual
446 }
447 else if let Some('>') = state.peek() {
448 state.advance(1);
449 JuliaSyntaxKind::RightShift
450 }
451 else {
452 JuliaSyntaxKind::GreaterThan
453 }
454 }
455 '&' => {
456 state.advance(1);
457 if let Some('&') = state.peek() {
458 state.advance(1);
459 JuliaSyntaxKind::And
460 }
461 else {
462 JuliaSyntaxKind::BitAnd
463 }
464 }
465 '|' => {
466 state.advance(1);
467 if let Some('|') = state.peek() {
468 state.advance(1);
469 JuliaSyntaxKind::Or
470 }
471 else {
472 JuliaSyntaxKind::BitOr
473 }
474 }
475 '~' => {
476 state.advance(1);
477 JuliaSyntaxKind::BitNot
478 }
479 ':' => {
480 state.advance(1);
481 JuliaSyntaxKind::Colon
482 }
483 '.' => {
484 state.advance(1);
485 if let Some('.') = state.peek() {
486 state.advance(1);
487 JuliaSyntaxKind::Range
488 }
489 else {
490 JuliaSyntaxKind::Dot
491 }
492 }
493 _ => return false,
494 };
495
496 state.add_token(token_kind, start_pos, state.get_position());
497 true
498 }
499 else {
500 false
501 }
502 }
503
504 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
506 let start_pos = state.get_position();
507
508 if let Some(ch) = state.peek() {
509 let token_kind = match ch {
510 '(' => JuliaSyntaxKind::LeftParen,
511 ')' => JuliaSyntaxKind::RightParen,
512 '[' => JuliaSyntaxKind::LeftBracket,
513 ']' => JuliaSyntaxKind::RightBracket,
514 '{' => JuliaSyntaxKind::LeftBrace,
515 '}' => JuliaSyntaxKind::RightBrace,
516 ',' => JuliaSyntaxKind::Comma,
517 ';' => JuliaSyntaxKind::Semicolon,
518 _ => return false,
519 };
520
521 state.advance(ch.len_utf8());
522 state.add_token(token_kind, start_pos, state.get_position());
523 true
524 }
525 else {
526 false
527 }
528 }
529}
530
531impl<'config> JuliaLexer<'config> {
532 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
534 while state.not_at_end() {
535 let safe_point = state.get_position();
536
537 if self.skip_whitespace(state) {
539 continue;
540 }
541
542 if self.lex_newline(state) {
543 continue;
544 }
545
546 if self.lex_comment(state) {
547 continue;
548 }
549
550 if self.lex_triple_string(state) {
551 continue;
552 }
553
554 if self.lex_string(state) {
555 continue;
556 }
557
558 if self.lex_number(state) {
559 continue;
560 }
561
562 if self.lex_identifier_or_keyword(state) {
563 continue;
564 }
565
566 if self.lex_operator(state) {
567 continue;
568 }
569
570 if self.lex_delimiter(state) {
571 continue;
572 }
573
574 let start_pos = state.get_position();
576 if let Some(ch) = state.peek() {
577 state.advance(ch.len_utf8());
578 state.add_token(JuliaSyntaxKind::Error, start_pos, state.get_position());
579 }
580
581 state.advance_if_dead_lock(safe_point);
582 }
583
584 Ok(())
585 }
586}