oak_julia/lexer/
mod.rs

1use crate::{kind::JuliaSyntaxKind, language::JuliaLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, JuliaLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct JuliaLexer<'config> {
8    _config: &'config JuliaLanguage,
9}
10
11impl<'config> JuliaLexer<'config> {
12    pub fn new(config: &'config JuliaLanguage) -> Self {
13        Self { _config: config }
14    }
15}
16
17impl<'config> Lexer<JuliaLanguage> for JuliaLexer<'config> {
18    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<JuliaLanguage>) -> LexOutput<JuliaLanguage> {
19        let mut state = LexerState::new(source);
20        let result = self.run(&mut state);
21        if result.is_ok() {
22            state.add_eof();
23        }
24        state.finish_with_cache(result, cache)
25    }
26}
27
28impl<'config> JuliaLexer<'config> {
29    /// 跳过空白字符
30    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
31        let start_pos = state.get_position();
32
33        while let Some(ch) = state.peek() {
34            if ch == ' ' || ch == '\t' {
35                state.advance(ch.len_utf8());
36            }
37            else {
38                break;
39            }
40        }
41
42        if state.get_position() > start_pos {
43            state.add_token(JuliaSyntaxKind::Whitespace, start_pos, state.get_position());
44            true
45        }
46        else {
47            false
48        }
49    }
50
51    /// 处理换行
52    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
53        let start_pos = state.get_position();
54
55        if let Some('\n') = state.peek() {
56            state.advance(1);
57            state.add_token(JuliaSyntaxKind::Newline, start_pos, state.get_position());
58            true
59        }
60        else if let Some('\r') = state.peek() {
61            state.advance(1);
62            if let Some('\n') = state.peek() {
63                state.advance(1);
64            }
65            state.add_token(JuliaSyntaxKind::Newline, start_pos, state.get_position());
66            true
67        }
68        else {
69            false
70        }
71    }
72
73    /// 处理标识符和关键字
74    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
75        let start_pos = state.get_position();
76
77        if let Some(ch) = state.peek() {
78            if ch.is_ascii_alphabetic() || ch == '_' {
79                state.advance(ch.len_utf8());
80
81                while let Some(ch) = state.peek() {
82                    if ch.is_ascii_alphanumeric() || ch == '_' || ch == '!' || ch == '?' {
83                        state.advance(ch.len_utf8());
84                    }
85                    else {
86                        break;
87                    }
88                }
89
90                let end_pos = state.get_position();
91                let identifier_str = state.get_text_in((start_pos..end_pos).into());
92
93                // 检查是否是关键字
94                if let Some(keyword_kind) = JuliaSyntaxKind::from_str(identifier_str.as_ref()) {
95                    state.add_token(keyword_kind, start_pos, end_pos);
96                }
97                else {
98                    state.add_token(JuliaSyntaxKind::Identifier, start_pos, end_pos);
99                }
100                true
101            }
102            else {
103                false
104            }
105        }
106        else {
107            false
108        }
109    }
110
111    /// 处理数字字面量
112    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113        let start_pos = state.get_position();
114
115        if let Some(ch) = state.peek() {
116            if ch.is_ascii_digit() {
117                state.advance(1);
118
119                // 处理整数部分
120                while let Some(ch) = state.peek() {
121                    if ch.is_ascii_digit() || ch == '_' {
122                        state.advance(1);
123                    }
124                    else {
125                        break;
126                    }
127                }
128
129                let mut is_float = false;
130
131                // 检查小数点
132                if let Some('.') = state.peek() {
133                    // 检查下一个字符是否是数字,避免与范围操作符混淆
134                    if let Some(next_ch) = state.peek_next_n(1) {
135                        if next_ch.is_ascii_digit() {
136                            is_float = true;
137                            state.advance(1); // 跳过小数
138                            // 处理小数部分
139                            while let Some(ch) = state.peek() {
140                                if ch.is_ascii_digit() || ch == '_' {
141                                    state.advance(1);
142                                }
143                                else {
144                                    break;
145                                }
146                            }
147                        }
148                    }
149                }
150
151                // 检查科学计数法
152                if let Some(ch) = state.peek() {
153                    if ch == 'e' || ch == 'E' {
154                        is_float = true;
155                        state.advance(1);
156
157                        // 可选的符号
158                        if let Some(sign) = state.peek() {
159                            if sign == '+' || sign == '-' {
160                                state.advance(1);
161                            }
162                        }
163
164                        // 指数部分
165                        while let Some(ch) = state.peek() {
166                            if ch.is_ascii_digit() {
167                                state.advance(1);
168                            }
169                            else {
170                                break;
171                            }
172                        }
173                    }
174                }
175
176                // 检查类型后缀 (f32, f64, i32, i64)
177                if let Some(ch) = state.peek() {
178                    if ch.is_ascii_alphabetic() {
179                        while let Some(ch) = state.peek() {
180                            if ch.is_ascii_alphanumeric() {
181                                state.advance(1);
182                            }
183                            else {
184                                break;
185                            }
186                        }
187                    }
188                }
189
190                let token_kind = if is_float { JuliaSyntaxKind::FloatLiteral } else { JuliaSyntaxKind::IntegerLiteral };
191
192                state.add_token(token_kind, start_pos, state.get_position());
193                true
194            }
195            else {
196                false
197            }
198        }
199        else {
200            false
201        }
202    }
203
204    /// 处理字符串字面量
205    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
206        let start_pos = state.get_position();
207
208        if let Some(quote) = state.peek() {
209            if quote == '"' || quote == '\'' {
210                state.advance(1);
211                let mut found_end = false;
212
213                while let Some(ch) = state.peek() {
214                    if ch == quote {
215                        state.advance(1);
216                        found_end = true;
217                        break;
218                    }
219                    else if ch == '\\' {
220                        // 处理转义字符
221                        state.advance(1);
222                        if let Some(_) = state.peek() {
223                            state.advance(1);
224                        }
225                    }
226                    else {
227                        state.advance(ch.len_utf8());
228                    }
229                }
230
231                if found_end {
232                    let token_kind = if quote == '\'' { JuliaSyntaxKind::CharLiteral } else { JuliaSyntaxKind::StringLiteral };
233                    state.add_token(token_kind, start_pos, state.get_position());
234                    true
235                }
236                else {
237                    // 未找到结束引号,回退到开始位
238                    state.set_position(start_pos);
239                    false
240                }
241            }
242            else {
243                false
244            }
245        }
246        else {
247            false
248        }
249    }
250
251    /// 处理三重引号字符
252    fn lex_triple_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
253        let start_pos = state.get_position();
254
255        // 检查是否是三重引号
256        if let Some('"') = state.peek() {
257            if let Some('"') = state.peek_next_n(1) {
258                if let Some('"') = state.peek_next_n(2) {
259                    state.advance(3);
260
261                    // 寻找结束的三重引号
262                    while let Some(ch) = state.peek() {
263                        if ch == '"' {
264                            if let Some('"') = state.peek_next_n(1) {
265                                if let Some('"') = state.peek_next_n(2) {
266                                    state.advance(3);
267                                    state.add_token(JuliaSyntaxKind::StringLiteral, start_pos, state.get_position());
268                                    return true;
269                                }
270                            }
271                        }
272                        state.advance(ch.len_utf8());
273                    }
274
275                    // 未找到结束的三重引号,回退
276                    state.set_position(start_pos);
277                }
278            }
279        }
280        false
281    }
282
283    /// 处理注释
284    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
285        let start_pos = state.get_position();
286
287        if let Some('#') = state.peek() {
288            // 检查是否是多行注释 #=
289            if let Some('=') = state.peek_next_n(1) {
290                state.advance(2);
291                let mut depth = 1;
292
293                while let Some(ch) = state.peek() {
294                    if depth == 0 {
295                        break;
296                    }
297                    if ch == '#' && state.peek_next_n(1) == Some('=') {
298                        depth += 1;
299                        state.advance(2);
300                    }
301                    else if ch == '=' && state.peek_next_n(1) == Some('#') {
302                        depth -= 1;
303                        state.advance(2);
304                    }
305                    else {
306                        state.advance(ch.len_utf8());
307                    }
308                }
309
310                state.add_token(JuliaSyntaxKind::Comment, start_pos, state.get_position());
311                true
312            }
313            else {
314                // 单行注释
315                state.advance(1);
316
317                while let Some(ch) = state.peek() {
318                    if ch == '\n' || ch == '\r' {
319                        break;
320                    }
321                    state.advance(ch.len_utf8());
322                }
323
324                state.add_token(JuliaSyntaxKind::Comment, start_pos, state.get_position());
325                true
326            }
327        }
328        else {
329            false
330        }
331    }
332
333    /// 处理操作符
334    fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
335        let start_pos = state.get_position();
336
337        if let Some(ch) = state.peek() {
338            let token_kind = match ch {
339                '+' => {
340                    state.advance(1);
341                    if let Some('=') = state.peek() {
342                        state.advance(1);
343                        JuliaSyntaxKind::PlusAssign
344                    }
345                    else {
346                        JuliaSyntaxKind::Plus
347                    }
348                }
349                '-' => {
350                    state.advance(1);
351                    if let Some('=') = state.peek() {
352                        state.advance(1);
353                        JuliaSyntaxKind::MinusAssign
354                    }
355                    else if let Some('>') = state.peek() {
356                        state.advance(1);
357                        JuliaSyntaxKind::Arrow
358                    }
359                    else {
360                        JuliaSyntaxKind::Minus
361                    }
362                }
363                '*' => {
364                    state.advance(1);
365                    if let Some('=') = state.peek() {
366                        state.advance(1);
367                        JuliaSyntaxKind::StarAssign
368                    }
369                    else {
370                        JuliaSyntaxKind::Star
371                    }
372                }
373                '/' => {
374                    state.advance(1);
375                    if let Some('=') = state.peek() {
376                        state.advance(1);
377                        JuliaSyntaxKind::SlashAssign
378                    }
379                    else {
380                        JuliaSyntaxKind::Slash
381                    }
382                }
383                '%' => {
384                    state.advance(1);
385                    if let Some('=') = state.peek() {
386                        state.advance(1);
387                        JuliaSyntaxKind::PercentAssign
388                    }
389                    else {
390                        JuliaSyntaxKind::Percent
391                    }
392                }
393                '^' => {
394                    state.advance(1);
395                    if let Some('=') = state.peek() {
396                        state.advance(1);
397                        JuliaSyntaxKind::CaretAssign
398                    }
399                    else {
400                        JuliaSyntaxKind::Caret
401                    }
402                }
403                '=' => {
404                    state.advance(1);
405                    if let Some('=') = state.peek() {
406                        state.advance(1);
407                        JuliaSyntaxKind::Equal
408                    }
409                    else if let Some('>') = state.peek() {
410                        state.advance(1);
411                        JuliaSyntaxKind::FatArrow
412                    }
413                    else {
414                        JuliaSyntaxKind::Assign
415                    }
416                }
417                '!' => {
418                    state.advance(1);
419                    if let Some('=') = state.peek() {
420                        state.advance(1);
421                        JuliaSyntaxKind::NotEqual
422                    }
423                    else {
424                        JuliaSyntaxKind::Not
425                    }
426                }
427                '<' => {
428                    state.advance(1);
429                    if let Some('=') = state.peek() {
430                        state.advance(1);
431                        JuliaSyntaxKind::LessEqual
432                    }
433                    else if let Some('<') = state.peek() {
434                        state.advance(1);
435                        JuliaSyntaxKind::LeftShift
436                    }
437                    else {
438                        JuliaSyntaxKind::LessThan
439                    }
440                }
441                '>' => {
442                    state.advance(1);
443                    if let Some('=') = state.peek() {
444                        state.advance(1);
445                        JuliaSyntaxKind::GreaterEqual
446                    }
447                    else if let Some('>') = state.peek() {
448                        state.advance(1);
449                        JuliaSyntaxKind::RightShift
450                    }
451                    else {
452                        JuliaSyntaxKind::GreaterThan
453                    }
454                }
455                '&' => {
456                    state.advance(1);
457                    if let Some('&') = state.peek() {
458                        state.advance(1);
459                        JuliaSyntaxKind::And
460                    }
461                    else {
462                        JuliaSyntaxKind::BitAnd
463                    }
464                }
465                '|' => {
466                    state.advance(1);
467                    if let Some('|') = state.peek() {
468                        state.advance(1);
469                        JuliaSyntaxKind::Or
470                    }
471                    else {
472                        JuliaSyntaxKind::BitOr
473                    }
474                }
475                '~' => {
476                    state.advance(1);
477                    JuliaSyntaxKind::BitNot
478                }
479                ':' => {
480                    state.advance(1);
481                    JuliaSyntaxKind::Colon
482                }
483                '.' => {
484                    state.advance(1);
485                    if let Some('.') = state.peek() {
486                        state.advance(1);
487                        JuliaSyntaxKind::Range
488                    }
489                    else {
490                        JuliaSyntaxKind::Dot
491                    }
492                }
493                _ => return false,
494            };
495
496            state.add_token(token_kind, start_pos, state.get_position());
497            true
498        }
499        else {
500            false
501        }
502    }
503
504    /// 处理分隔符
505    fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
506        let start_pos = state.get_position();
507
508        if let Some(ch) = state.peek() {
509            let token_kind = match ch {
510                '(' => JuliaSyntaxKind::LeftParen,
511                ')' => JuliaSyntaxKind::RightParen,
512                '[' => JuliaSyntaxKind::LeftBracket,
513                ']' => JuliaSyntaxKind::RightBracket,
514                '{' => JuliaSyntaxKind::LeftBrace,
515                '}' => JuliaSyntaxKind::RightBrace,
516                ',' => JuliaSyntaxKind::Comma,
517                ';' => JuliaSyntaxKind::Semicolon,
518                _ => return false,
519            };
520
521            state.advance(ch.len_utf8());
522            state.add_token(token_kind, start_pos, state.get_position());
523            true
524        }
525        else {
526            false
527        }
528    }
529}
530
531impl<'config> JuliaLexer<'config> {
532    /// 主要的词法分析循环
533    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
534        while state.not_at_end() {
535            let safe_point = state.get_position();
536
537            // 尝试各种词法规则
538            if self.skip_whitespace(state) {
539                continue;
540            }
541
542            if self.lex_newline(state) {
543                continue;
544            }
545
546            if self.lex_comment(state) {
547                continue;
548            }
549
550            if self.lex_triple_string(state) {
551                continue;
552            }
553
554            if self.lex_string(state) {
555                continue;
556            }
557
558            if self.lex_number(state) {
559                continue;
560            }
561
562            if self.lex_identifier_or_keyword(state) {
563                continue;
564            }
565
566            if self.lex_operator(state) {
567                continue;
568            }
569
570            if self.lex_delimiter(state) {
571                continue;
572            }
573
574            // 如果所有规则都不匹配,跳过当前字符并标记为错误
575            let start_pos = state.get_position();
576            if let Some(ch) = state.peek() {
577                state.advance(ch.len_utf8());
578                state.add_token(JuliaSyntaxKind::Error, start_pos, state.get_position());
579            }
580
581            state.advance_if_dead_lock(safe_point);
582        }
583
584        Ok(())
585    }
586}