1use crate::{kind::JuliaSyntaxKind, language::JuliaLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, JuliaLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct JuliaLexer<'config> {
8 config: &'config JuliaLanguage,
9}
10
11impl<'config> JuliaLexer<'config> {
12 pub fn new(config: &'config JuliaLanguage) -> Self {
13 Self { config }
14 }
15
16 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
18 let start_pos = state.get_position();
19
20 while let Some(ch) = state.peek() {
21 if ch == ' ' || ch == '\t' {
22 state.advance(ch.len_utf8());
23 }
24 else {
25 break;
26 }
27 }
28
29 if state.get_position() > start_pos {
30 state.add_token(JuliaSyntaxKind::Whitespace, start_pos, state.get_position());
31 true
32 }
33 else {
34 false
35 }
36 }
37
38 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
40 let start_pos = state.get_position();
41
42 if let Some('\n') = state.peek() {
43 state.advance(1);
44 state.add_token(JuliaSyntaxKind::Newline, start_pos, state.get_position());
45 true
46 }
47 else if let Some('\r') = state.peek() {
48 state.advance(1);
49 if let Some('\n') = state.peek() {
50 state.advance(1);
51 }
52 state.add_token(JuliaSyntaxKind::Newline, start_pos, state.get_position());
53 true
54 }
55 else {
56 false
57 }
58 }
59
60 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
62 let start_pos = state.get_position();
63
64 if let Some(ch) = state.peek() {
65 if ch.is_ascii_alphabetic() || ch == '_' {
66 state.advance(ch.len_utf8());
67
68 while let Some(ch) = state.peek() {
69 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '!' || ch == '?' {
70 state.advance(ch.len_utf8());
71 }
72 else {
73 break;
74 }
75 }
76
77 let end_pos = state.get_position();
78 let identifier_str = state.get_text_in((start_pos..end_pos).into());
79
80 if let Some(keyword_kind) = JuliaSyntaxKind::from_str(identifier_str) {
82 state.add_token(keyword_kind, start_pos, end_pos);
83 }
84 else {
85 state.add_token(JuliaSyntaxKind::Identifier, start_pos, end_pos);
86 }
87 true
88 }
89 else {
90 false
91 }
92 }
93 else {
94 false
95 }
96 }
97
98 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
100 let start_pos = state.get_position();
101
102 if let Some(ch) = state.peek() {
103 if ch.is_ascii_digit() {
104 state.advance(1);
105
106 while let Some(ch) = state.peek() {
108 if ch.is_ascii_digit() || ch == '_' {
109 state.advance(1);
110 }
111 else {
112 break;
113 }
114 }
115
116 let mut is_float = false;
117
118 if let Some('.') = state.peek() {
120 if let Some(next_ch) = state.peek_next_n(1) {
122 if next_ch.is_ascii_digit() {
123 is_float = true;
124 state.advance(1); while let Some(ch) = state.peek() {
127 if ch.is_ascii_digit() || ch == '_' {
128 state.advance(1);
129 }
130 else {
131 break;
132 }
133 }
134 }
135 }
136 }
137
138 if let Some(ch) = state.peek() {
140 if ch == 'e' || ch == 'E' {
141 is_float = true;
142 state.advance(1);
143
144 if let Some(sign) = state.peek() {
146 if sign == '+' || sign == '-' {
147 state.advance(1);
148 }
149 }
150
151 while let Some(ch) = state.peek() {
153 if ch.is_ascii_digit() {
154 state.advance(1);
155 }
156 else {
157 break;
158 }
159 }
160 }
161 }
162
163 if let Some(ch) = state.peek() {
165 if ch.is_ascii_alphabetic() {
166 while let Some(ch) = state.peek() {
167 if ch.is_ascii_alphanumeric() {
168 state.advance(1);
169 }
170 else {
171 break;
172 }
173 }
174 }
175 }
176
177 let token_kind = if is_float { JuliaSyntaxKind::FloatLiteral } else { JuliaSyntaxKind::IntegerLiteral };
178
179 state.add_token(token_kind, start_pos, state.get_position());
180 true
181 }
182 else {
183 false
184 }
185 }
186 else {
187 false
188 }
189 }
190
191 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
193 let start_pos = state.get_position();
194
195 if let Some(quote) = state.peek() {
196 if quote == '"' || quote == '\'' {
197 state.advance(1);
198 let mut found_end = false;
199
200 while let Some(ch) = state.peek() {
201 if ch == quote {
202 state.advance(1);
203 found_end = true;
204 break;
205 }
206 else if ch == '\\' {
207 state.advance(1);
209 if let Some(_) = state.peek() {
210 state.advance(1);
211 }
212 }
213 else {
214 state.advance(ch.len_utf8());
215 }
216 }
217
218 if found_end {
219 let token_kind = if quote == '\'' { JuliaSyntaxKind::CharLiteral } else { JuliaSyntaxKind::StringLiteral };
220 state.add_token(token_kind, start_pos, state.get_position());
221 true
222 }
223 else {
224 false
226 }
227 }
228 else {
229 false
230 }
231 }
232 else {
233 false
234 }
235 }
236
237 fn lex_triple_string<S: Source>(&self, state: &mut State<S>) -> bool {
239 let start_pos = state.get_position();
240
241 if let Some('"') = state.peek() {
243 if let Some('"') = state.peek_next_n(1) {
244 if let Some('"') = state.peek_next_n(2) {
245 state.advance(3);
246
247 while let Some(ch) = state.peek() {
249 if ch == '"' {
250 if let Some('"') = state.peek_next_n(1) {
251 if let Some('"') = state.peek_next_n(2) {
252 state.advance(3);
253 state.add_token(JuliaSyntaxKind::StringLiteral, start_pos, state.get_position());
254 return true;
255 }
256 }
257 }
258 state.advance(ch.len_utf8());
259 }
260
261 state.set_position(start_pos);
263 }
264 }
265 }
266 false
267 }
268
269 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
271 let start_pos = state.get_position();
272
273 if let Some('#') = state.peek() {
274 if let Some('=') = state.peek_next_n(1) {
276 state.advance(2);
277 let mut depth = 1;
278
279 while let Some(ch) = state.peek()
280 && depth > 0
281 {
282 if ch == '#' && state.peek_next_n(1) == Some('=') {
283 depth += 1;
284 state.advance(2);
285 }
286 else if ch == '=' && state.peek_next_n(1) == Some('#') {
287 depth -= 1;
288 state.advance(2);
289 }
290 else {
291 state.advance(ch.len_utf8());
292 }
293 }
294
295 state.add_token(JuliaSyntaxKind::Comment, start_pos, state.get_position());
296 true
297 }
298 else {
299 state.advance(1);
301
302 while let Some(ch) = state.peek() {
303 if ch == '\n' || ch == '\r' {
304 break;
305 }
306 state.advance(ch.len_utf8());
307 }
308
309 state.add_token(JuliaSyntaxKind::Comment, start_pos, state.get_position());
310 true
311 }
312 }
313 else {
314 false
315 }
316 }
317
318 fn lex_operator<S: Source>(&self, state: &mut State<S>) -> bool {
320 let start_pos = state.get_position();
321
322 if let Some(ch) = state.peek() {
323 let token_kind = match ch {
324 '+' => {
325 state.advance(1);
326 if let Some('=') = state.peek() {
327 state.advance(1);
328 JuliaSyntaxKind::PlusAssign
329 }
330 else {
331 JuliaSyntaxKind::Plus
332 }
333 }
334 '-' => {
335 state.advance(1);
336 if let Some('=') = state.peek() {
337 state.advance(1);
338 JuliaSyntaxKind::MinusAssign
339 }
340 else if let Some('>') = state.peek() {
341 state.advance(1);
342 JuliaSyntaxKind::Arrow
343 }
344 else {
345 JuliaSyntaxKind::Minus
346 }
347 }
348 '*' => {
349 state.advance(1);
350 if let Some('=') = state.peek() {
351 state.advance(1);
352 JuliaSyntaxKind::StarAssign
353 }
354 else {
355 JuliaSyntaxKind::Star
356 }
357 }
358 '/' => {
359 state.advance(1);
360 if let Some('=') = state.peek() {
361 state.advance(1);
362 JuliaSyntaxKind::SlashAssign
363 }
364 else {
365 JuliaSyntaxKind::Slash
366 }
367 }
368 '%' => {
369 state.advance(1);
370 if let Some('=') = state.peek() {
371 state.advance(1);
372 JuliaSyntaxKind::PercentAssign
373 }
374 else {
375 JuliaSyntaxKind::Percent
376 }
377 }
378 '^' => {
379 state.advance(1);
380 if let Some('=') = state.peek() {
381 state.advance(1);
382 JuliaSyntaxKind::CaretAssign
383 }
384 else {
385 JuliaSyntaxKind::Caret
386 }
387 }
388 '=' => {
389 state.advance(1);
390 if let Some('=') = state.peek() {
391 state.advance(1);
392 JuliaSyntaxKind::Equal
393 }
394 else if let Some('>') = state.peek() {
395 state.advance(1);
396 JuliaSyntaxKind::FatArrow
397 }
398 else {
399 JuliaSyntaxKind::Assign
400 }
401 }
402 '!' => {
403 state.advance(1);
404 if let Some('=') = state.peek() {
405 state.advance(1);
406 JuliaSyntaxKind::NotEqual
407 }
408 else {
409 JuliaSyntaxKind::Not
410 }
411 }
412 '<' => {
413 state.advance(1);
414 if let Some('=') = state.peek() {
415 state.advance(1);
416 JuliaSyntaxKind::LessEqual
417 }
418 else if let Some('<') = state.peek() {
419 state.advance(1);
420 JuliaSyntaxKind::LeftShift
421 }
422 else {
423 JuliaSyntaxKind::LessThan
424 }
425 }
426 '>' => {
427 state.advance(1);
428 if let Some('=') = state.peek() {
429 state.advance(1);
430 JuliaSyntaxKind::GreaterEqual
431 }
432 else if let Some('>') = state.peek() {
433 state.advance(1);
434 JuliaSyntaxKind::RightShift
435 }
436 else {
437 JuliaSyntaxKind::GreaterThan
438 }
439 }
440 '&' => {
441 state.advance(1);
442 if let Some('&') = state.peek() {
443 state.advance(1);
444 JuliaSyntaxKind::And
445 }
446 else {
447 JuliaSyntaxKind::BitAnd
448 }
449 }
450 '|' => {
451 state.advance(1);
452 if let Some('|') = state.peek() {
453 state.advance(1);
454 JuliaSyntaxKind::Or
455 }
456 else {
457 JuliaSyntaxKind::BitOr
458 }
459 }
460 '~' => {
461 state.advance(1);
462 JuliaSyntaxKind::BitNot
463 }
464 ':' => {
465 state.advance(1);
466 JuliaSyntaxKind::Colon
467 }
468 '.' => {
469 state.advance(1);
470 if let Some('.') = state.peek() {
471 state.advance(1);
472 JuliaSyntaxKind::Range
473 }
474 else {
475 JuliaSyntaxKind::Dot
476 }
477 }
478 _ => return false,
479 };
480
481 state.add_token(token_kind, start_pos, state.get_position());
482 true
483 }
484 else {
485 false
486 }
487 }
488
489 fn lex_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
491 let start_pos = state.get_position();
492
493 if let Some(ch) = state.peek() {
494 let token_kind = match ch {
495 '(' => JuliaSyntaxKind::LeftParen,
496 ')' => JuliaSyntaxKind::RightParen,
497 '[' => JuliaSyntaxKind::LeftBracket,
498 ']' => JuliaSyntaxKind::RightBracket,
499 '{' => JuliaSyntaxKind::LeftBrace,
500 '}' => JuliaSyntaxKind::RightBrace,
501 ',' => JuliaSyntaxKind::Comma,
502 ';' => JuliaSyntaxKind::Semicolon,
503 _ => return false,
504 };
505
506 state.advance(ch.len_utf8());
507 state.add_token(token_kind, start_pos, state.get_position());
508 true
509 }
510 else {
511 false
512 }
513 }
514}
515
516impl<'config> Lexer<JuliaLanguage> for JuliaLexer<'config> {
517 fn lex_incremental(
518 &self,
519 source: impl Source,
520 changed: usize,
521 cache: IncrementalCache<JuliaLanguage>,
522 ) -> LexOutput<JuliaLanguage> {
523 let mut state = LexerState::new_with_cache(source, changed, cache);
524 let result = self.run(&mut state);
525 state.finish(result)
526 }
527}
528
529impl<'config> JuliaLexer<'config> {
530 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), oak_core::OakError> {
532 while state.not_at_end() {
533 if self.skip_whitespace(state) {
535 continue;
536 }
537
538 if self.lex_newline(state) {
539 continue;
540 }
541
542 if self.lex_comment(state) {
543 continue;
544 }
545
546 if self.lex_triple_string(state) {
547 continue;
548 }
549
550 if self.lex_string(state) {
551 continue;
552 }
553
554 if self.lex_number(state) {
555 continue;
556 }
557
558 if self.lex_identifier_or_keyword(state) {
559 continue;
560 }
561
562 if self.lex_operator(state) {
563 continue;
564 }
565
566 if self.lex_delimiter(state) {
567 continue;
568 }
569
570 let start_pos = state.get_position();
572 if let Some(ch) = state.peek() {
573 state.advance(ch.len_utf8());
574 state.add_token(JuliaSyntaxKind::Error, start_pos, state.get_position());
575 }
576 }
577
578 let eof_pos = state.get_position();
580 state.add_token(JuliaSyntaxKind::Eof, eof_pos, eof_pos);
581
582 Ok(())
583 }
584}