oak_nim/lexer/
mod.rs

1use crate::{kind::NimSyntaxKind, language::NimLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
3use std::borrow::Cow;
4
5type State<'s, S> = LexerState<'s, S, NimLanguage>;
6
7#[derive(Clone, Debug)]
8pub struct NimLexer<'config> {
9    _config: &'config NimLanguage,
10}
11
12impl<'config> NimLexer<'config> {
13    pub fn new(config: &'config NimLanguage) -> Self {
14        Self { _config: config }
15    }
16
17    /// 跳过空白字符
18    fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
19        let start_pos = state.get_position();
20
21        while let Some(ch) = state.peek() {
22            if ch == ' ' || ch == '\t' {
23                state.advance(ch.len_utf8());
24            }
25            else {
26                break;
27            }
28        }
29
30        if state.get_position() > start_pos {
31            state.add_token(NimSyntaxKind::Whitespace, start_pos, state.get_position());
32            true
33        }
34        else {
35            false
36        }
37    }
38
39    /// 处理换行
40    fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
41        let start_pos = state.get_position();
42
43        if let Some('\n') = state.peek() {
44            state.advance(1);
45            state.add_token(NimSyntaxKind::Newline, start_pos, state.get_position());
46            true
47        }
48        else if let Some('\r') = state.peek() {
49            state.advance(1);
50            if let Some('\n') = state.peek() {
51                state.advance(1);
52            }
53            state.add_token(NimSyntaxKind::Newline, start_pos, state.get_position());
54            true
55        }
56        else {
57            false
58        }
59    }
60
61    /// 处理注释
62    fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
63        let start_pos = state.get_position();
64
65        if let Some('#') = state.peek() {
66            state.advance(1);
67
68            // 检查是否是文档注释 ##
69            if let Some('#') = state.peek() {
70                state.advance(1);
71            }
72
73            // 读取到行
74            while let Some(ch) = state.peek() {
75                if ch == '\n' || ch == '\r' {
76                    break;
77                }
78                state.advance(ch.len_utf8());
79            }
80
81            let kind = NimSyntaxKind::CommentToken;
82
83            state.add_token(kind, start_pos, state.get_position());
84            true
85        }
86        else {
87            false
88        }
89    }
90
91    /// 处理字符串字面量
92    fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
93        let start_pos = state.get_position();
94
95        if let Some('"') = state.peek() {
96            state.advance(1);
97
98            while let Some(ch) = state.peek() {
99                if ch == '"' {
100                    state.advance(1);
101                    break;
102                }
103                if ch == '\\' {
104                    state.advance(1);
105                    if let Some(c) = state.peek() {
106                        state.advance(c.len_utf8());
107                    }
108                }
109                else {
110                    state.advance(ch.len_utf8());
111                }
112            }
113
114            state.add_token(NimSyntaxKind::StringLiteral, start_pos, state.get_position());
115            true
116        }
117        else {
118            false
119        }
120    }
121
122    /// 处理字符字面量
123    fn lex_char<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
124        let start_pos = state.get_position();
125
126        if let Some('\'') = state.peek() {
127            state.advance(1);
128
129            if let Some('\\') = state.peek() {
130                state.advance(1);
131                if let Some(c) = state.peek() {
132                    state.advance(c.len_utf8());
133                }
134            }
135            else if let Some(c) = state.peek() {
136                if c != '\'' {
137                    state.advance(c.len_utf8());
138                }
139            }
140
141            if let Some('\'') = state.peek() {
142                state.advance(1);
143            }
144
145            state.add_token(NimSyntaxKind::CharLiteral, start_pos, state.get_position());
146            true
147        }
148        else {
149            false
150        }
151    }
152
153    /// 处理数字
154    fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
155        let start_pos = state.get_position();
156
157        if let Some(ch) = state.peek() {
158            if ch.is_ascii_digit() {
159                state.advance(ch.len_utf8());
160
161                while let Some(ch) = state.peek() {
162                    if ch.is_ascii_digit() || ch == '_' {
163                        state.advance(ch.len_utf8());
164                    }
165                    else {
166                        break;
167                    }
168                }
169
170                // 简单的浮点数处理
171                let mut is_float = false;
172                if let Some('.') = state.peek() {
173                    state.advance(1);
174                    is_float = true;
175                    while let Some(ch) = state.peek() {
176                        if ch.is_ascii_digit() || ch == '_' {
177                            state.advance(ch.len_utf8());
178                        }
179                        else {
180                            break;
181                        }
182                    }
183                }
184
185                let kind = if is_float { NimSyntaxKind::FloatLiteral } else { NimSyntaxKind::IntLiteral };
186                state.add_token(kind, start_pos, state.get_position());
187                true
188            }
189            else {
190                false
191            }
192        }
193        else {
194            false
195        }
196    }
197
198    /// 处理标识符和关键字
199    fn lex_identifier<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
200        let start_pos = state.get_position();
201
202        if let Some(ch) = state.peek() {
203            if ch.is_alphabetic() || ch == '_' {
204                state.advance(ch.len_utf8());
205
206                while let Some(ch) = state.peek() {
207                    if ch.is_alphanumeric() || ch == '_' {
208                        state.advance(ch.len_utf8());
209                    }
210                    else {
211                        break;
212                    }
213                }
214
215                let text = state.get_text_in((start_pos..state.get_position()).into());
216                let kind = match text {
217                    Cow::Borrowed("and") => NimSyntaxKind::AndKeyword,
218                    Cow::Borrowed("or") => NimSyntaxKind::OrKeyword,
219                    Cow::Borrowed("not") => NimSyntaxKind::NotKeyword,
220                    Cow::Borrowed("if") => NimSyntaxKind::IfKeyword,
221                    Cow::Borrowed("else") => NimSyntaxKind::ElseKeyword,
222                    Cow::Borrowed("elif") => NimSyntaxKind::ElifKeyword,
223                    Cow::Borrowed("while") => NimSyntaxKind::WhileKeyword,
224                    Cow::Borrowed("for") => NimSyntaxKind::ForKeyword,
225                    Cow::Borrowed("proc") => NimSyntaxKind::ProcKeyword,
226                    Cow::Borrowed("func") => NimSyntaxKind::FuncKeyword,
227                    Cow::Borrowed("var") => NimSyntaxKind::VarKeyword,
228                    Cow::Borrowed("let") => NimSyntaxKind::LetKeyword,
229                    Cow::Borrowed("const") => NimSyntaxKind::ConstKeyword,
230                    Cow::Borrowed("type") => NimSyntaxKind::TypeKeyword,
231                    Cow::Borrowed("import") => NimSyntaxKind::ImportKeyword,
232                    Cow::Borrowed("from") => NimSyntaxKind::FromKeyword,
233                    Cow::Borrowed("include") => NimSyntaxKind::IncludeKeyword,
234                    Cow::Borrowed("return") => NimSyntaxKind::ReturnKeyword,
235                    Cow::Borrowed("yield") => NimSyntaxKind::YieldKeyword,
236                    Cow::Borrowed("break") => NimSyntaxKind::BreakKeyword,
237                    Cow::Borrowed("continue") => NimSyntaxKind::ContinueKeyword,
238                    Cow::Borrowed("try") => NimSyntaxKind::TryKeyword,
239                    Cow::Borrowed("except") => NimSyntaxKind::ExceptKeyword,
240                    Cow::Borrowed("finally") => NimSyntaxKind::FinallyKeyword,
241                    Cow::Borrowed("raise") => NimSyntaxKind::RaiseKeyword,
242                    Cow::Borrowed("case") => NimSyntaxKind::CaseKeyword,
243                    Cow::Borrowed("of") => NimSyntaxKind::OfKeyword,
244                    Cow::Borrowed("when") => NimSyntaxKind::WhenKeyword,
245                    Cow::Borrowed("is") => NimSyntaxKind::IsKeyword,
246                    Cow::Borrowed("in") => NimSyntaxKind::InKeyword,
247                    Cow::Borrowed("nil") => NimSyntaxKind::NilKeyword,
248                    _ => NimSyntaxKind::Identifier,
249                };
250
251                state.add_token(kind, start_pos, state.get_position());
252                true
253            }
254            else {
255                false
256            }
257        }
258        else {
259            false
260        }
261    }
262
263    /// 处理操作符
264    fn lex_operator<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
265        if let Some(ch) = state.peek() {
266            let start_pos = state.get_position();
267
268            match ch {
269                '+' => {
270                    state.advance(1);
271                    state.add_token(NimSyntaxKind::Plus, start_pos, state.get_position());
272                    true
273                }
274                '-' => {
275                    state.advance(1);
276                    state.add_token(NimSyntaxKind::Minus, start_pos, state.get_position());
277                    true
278                }
279                '*' => {
280                    state.advance(1);
281                    state.add_token(NimSyntaxKind::Star, start_pos, state.get_position());
282                    true
283                }
284                '/' => {
285                    state.advance(1);
286                    state.add_token(NimSyntaxKind::Slash, start_pos, state.get_position());
287                    true
288                }
289                '=' => {
290                    state.advance(1);
291                    if state.peek() == Some('=') {
292                        state.advance(1);
293                        state.add_token(NimSyntaxKind::EqualEqual, start_pos, state.get_position());
294                    }
295                    else {
296                        state.add_token(NimSyntaxKind::Equal, start_pos, state.get_position());
297                    }
298                    true
299                }
300                '!' => {
301                    state.advance(1);
302                    if state.peek() == Some('=') {
303                        state.advance(1);
304                        state.add_token(NimSyntaxKind::NotEqual, start_pos, state.get_position());
305                    }
306                    else {
307                        state.add_token(NimSyntaxKind::Error, start_pos, state.get_position());
308                    }
309                    true
310                }
311                '<' => {
312                    state.advance(1);
313                    if state.peek() == Some('=') {
314                        state.advance(1);
315                        state.add_token(NimSyntaxKind::LessEqual, start_pos, state.get_position());
316                    }
317                    else if state.peek() == Some('<') {
318                        state.advance(1);
319                        state.add_token(NimSyntaxKind::LeftShift, start_pos, state.get_position());
320                    }
321                    else {
322                        state.add_token(NimSyntaxKind::Less, start_pos, state.get_position());
323                    }
324                    true
325                }
326                '>' => {
327                    state.advance(1);
328                    if state.peek() == Some('=') {
329                        state.advance(1);
330                        state.add_token(NimSyntaxKind::GreaterEqual, start_pos, state.get_position());
331                    }
332                    else if state.peek() == Some('>') {
333                        state.advance(1);
334                        state.add_token(NimSyntaxKind::RightShift, start_pos, state.get_position());
335                    }
336                    else {
337                        state.add_token(NimSyntaxKind::Greater, start_pos, state.get_position());
338                    }
339                    true
340                }
341                '(' => {
342                    state.advance(1);
343                    state.add_token(NimSyntaxKind::LeftParen, start_pos, state.get_position());
344                    true
345                }
346                ')' => {
347                    state.advance(1);
348                    state.add_token(NimSyntaxKind::RightParen, start_pos, state.get_position());
349                    true
350                }
351                '[' => {
352                    state.advance(1);
353                    state.add_token(NimSyntaxKind::LeftBracket, start_pos, state.get_position());
354                    true
355                }
356                ']' => {
357                    state.advance(1);
358                    state.add_token(NimSyntaxKind::RightBracket, start_pos, state.get_position());
359                    true
360                }
361                '{' => {
362                    state.advance(1);
363                    state.add_token(NimSyntaxKind::LeftBrace, start_pos, state.get_position());
364                    true
365                }
366                '}' => {
367                    state.advance(1);
368                    state.add_token(NimSyntaxKind::RightBrace, start_pos, state.get_position());
369                    true
370                }
371                ',' => {
372                    state.advance(1);
373                    state.add_token(NimSyntaxKind::Comma, start_pos, state.get_position());
374                    true
375                }
376                ';' => {
377                    state.advance(1);
378                    state.add_token(NimSyntaxKind::Semicolon, start_pos, state.get_position());
379                    true
380                }
381                ':' => {
382                    state.advance(1);
383                    state.add_token(NimSyntaxKind::Colon, start_pos, state.get_position());
384                    true
385                }
386                '.' => {
387                    state.advance(1);
388                    state.add_token(NimSyntaxKind::Dot, start_pos, state.get_position());
389                    true
390                }
391                _ => false,
392            }
393        }
394        else {
395            false
396        }
397    }
398
399    pub fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
400        while state.not_at_end() {
401            if self.skip_whitespace(state) {
402                continue;
403            }
404
405            if self.lex_newline(state) {
406                continue;
407            }
408
409            if self.lex_comment(state) {
410                continue;
411            }
412
413            if self.lex_string(state) {
414                continue;
415            }
416
417            if self.lex_char(state) {
418                continue;
419            }
420
421            if self.lex_number(state) {
422                continue;
423            }
424
425            if self.lex_identifier(state) {
426                continue;
427            }
428
429            if self.lex_operator(state) {
430                continue;
431            }
432
433            // 如果没有匹配到任何模式，添加错误 kind
434            let start_pos = state.get_position();
435            if let Some(ch) = state.peek() {
436                state.advance(ch.len_utf8());
437                state.add_token(NimSyntaxKind::Error, start_pos, state.get_position());
438            }
439        }
440        Ok(())
441    }
442}
443
444impl<'config> Lexer<NimLanguage> for NimLexer<'config> {
445    fn lex<'s, S: Source + ?Sized>(&self, source: &'s S, _edits: &[oak_core::source::TextEdit], cache: &'s mut impl LexerCache<NimLanguage>) -> LexOutput<NimLanguage> {
446        let mut state = LexerState::new(source);
447        let result = self.run(&mut state);
448        if result.is_ok() {
449            state.add_eof();
450        }
451        state.finish_with_cache(result, cache)
452    }
453}
oak_nim/lexer/mod.rs

oak_nim/lexer/
mod.rs