Skip to main content

oak_dart/lexer/
mod.rs

1use crate::{kind::DartSyntaxKind, language::DartLanguage};
2use oak_core::{
3    LexOutput, Lexer, LexerCache, LexerState, OakError, Source, TextEdit,
4    lexer::{CommentConfig, StringConfig, WhitespaceConfig},
5};
6
7type State<'a, S> = LexerState<'a, S, DartLanguage>;
8
9static DART_WHITESPACE: WhitespaceConfig = WhitespaceConfig { unicode_whitespace: true };
10static DART_COMMENT: CommentConfig = CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true };
11static DART_STRING_DOUBLE: StringConfig = StringConfig { quotes: &['"'], escape: Some('\\') };
12static DART_STRING_SINGLE: StringConfig = StringConfig { quotes: &['\''], escape: Some('\\') };
13
14/// Lexer implementation for Dart language
15#[derive(Clone)]
16pub struct DartLexer<'config> {
17    _config: &'config DartLanguage,
18}
19
20impl<'config> Lexer<DartLanguage> for DartLexer<'config> {
21    fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DartLanguage>) -> LexOutput<DartLanguage> {
22        let mut state = State::new(text);
23        let result = self.run(&mut state);
24        if result.is_ok() {
25            state.add_eof();
26        }
27        state.finish_with_cache(result, cache)
28    }
29}
30
31impl<'config> DartLexer<'config> {
32    pub fn new(config: &'config DartLanguage) -> Self {
33        Self { _config: config }
34    }
35
36    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
37        while state.not_at_end() {
38            let safe_point = state.get_position();
39
40            if self.skip_whitespace(state) {
41                continue;
42            }
43
44            if self.skip_comment(state) {
45                continue;
46            }
47
48            if self.lex_string_literal(state) {
49                continue;
50            }
51
52            if self.lex_number_literal(state) {
53                continue;
54            }
55
56            if self.lex_identifier_or_keyword(state) {
57                continue;
58            }
59
60            if self.lex_operators(state) {
61                continue;
62            }
63
64            if self.lex_single_char_tokens(state) {
65                continue;
66            }
67
68            state.advance_if_dead_lock(safe_point);
69        }
70        Ok(())
71    }
72
73    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
74        DART_WHITESPACE.scan(state, DartSyntaxKind::Whitespace)
75    }
76
77    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78        DART_COMMENT.scan(state, DartSyntaxKind::LineComment, DartSyntaxKind::BlockComment)
79    }
80
81    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82        if DART_STRING_DOUBLE.scan(state, DartSyntaxKind::StringLiteral) {
83            return true;
84        }
85        if DART_STRING_SINGLE.scan(state, DartSyntaxKind::StringLiteral) {
86            return true;
87        }
88        false
89    }
90
91    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
92        let start = state.get_position();
93
94        if let Some(ch) = state.peek() {
95            if ch.is_ascii_digit() {
96                state.advance(ch.len_utf8());
97
98                // 消费数字
99                while let Some(ch) = state.peek() {
100                    if ch.is_ascii_digit() {
101                        state.advance(ch.len_utf8());
102                    }
103                    else {
104                        break;
105                    }
106                }
107
108                let mut is_double = false;
109
110                // 检查小数点
111                if state.starts_with(".") && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
112                    state.advance(1); // 跳过 '.'
113                    is_double = true;
114
115                    while let Some(ch) = state.peek() {
116                        if ch.is_ascii_digit() {
117                            state.advance(ch.len_utf8());
118                        }
119                        else {
120                            break;
121                        }
122                    }
123                }
124
125                // 检查科学计数法
126                if let Some(ch) = state.peek() {
127                    if ch == 'e' || ch == 'E' {
128                        state.advance(1);
129                        is_double = true;
130
131                        if let Some(ch) = state.peek() {
132                            if ch == '+' || ch == '-' {
133                                state.advance(1);
134                            }
135                        }
136
137                        while let Some(ch) = state.peek() {
138                            if ch.is_ascii_digit() {
139                                state.advance(ch.len_utf8());
140                            }
141                            else {
142                                break;
143                            }
144                        }
145                    }
146                }
147
148                let kind = if is_double { DartSyntaxKind::DoubleLiteral } else { DartSyntaxKind::IntegerLiteral };
149
150                state.add_token(kind, start, state.get_position());
151                return true;
152            }
153        }
154
155        false
156    }
157
158    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
159        let start = state.get_position();
160        let ch = match state.peek() {
161            Some(c) => c,
162            None => return false,
163        };
164
165        if !(ch.is_ascii_alphabetic() || ch == '_' || ch == '$') {
166            return false;
167        }
168
169        state.advance(ch.len_utf8());
170        while let Some(c) = state.peek() {
171            if c.is_ascii_alphanumeric() || c == '_' || c == '$' {
172                state.advance(c.len_utf8());
173            }
174            else {
175                break;
176            }
177        }
178
179        let end = state.get_position();
180        let text = state.get_text_in((start..end).into());
181
182        let kind = match text.as_ref() {
183            "abstract" => DartSyntaxKind::Abstract,
184            "as" => DartSyntaxKind::As,
185            "assert" => DartSyntaxKind::Assert,
186            "async" => DartSyntaxKind::Async,
187            "await" => DartSyntaxKind::Await,
188            "break" => DartSyntaxKind::Break,
189            "case" => DartSyntaxKind::Case,
190            "catch" => DartSyntaxKind::Catch,
191            "class" => DartSyntaxKind::Class,
192            "const" => DartSyntaxKind::Const,
193            "continue" => DartSyntaxKind::Continue,
194            "covariant" => DartSyntaxKind::Covariant,
195            "default" => DartSyntaxKind::Default,
196            "deferred" => DartSyntaxKind::Deferred,
197            "do" => DartSyntaxKind::Do,
198            "dynamic" => DartSyntaxKind::Dynamic,
199            "else" => DartSyntaxKind::Else,
200            "enum" => DartSyntaxKind::Enum,
201            "export" => DartSyntaxKind::Export,
202            "extends" => DartSyntaxKind::Extends,
203            "extension" => DartSyntaxKind::Extension,
204            "external" => DartSyntaxKind::External,
205            "factory" => DartSyntaxKind::Factory,
206            "false" => DartSyntaxKind::False,
207            "final" => DartSyntaxKind::Final,
208            "finally" => DartSyntaxKind::Finally,
209            "for" => DartSyntaxKind::For,
210            "function" => DartSyntaxKind::Function,
211            "get" => DartSyntaxKind::Get,
212            "hide" => DartSyntaxKind::Hide,
213            "if" => DartSyntaxKind::If,
214            "implements" => DartSyntaxKind::Implements,
215            "import" => DartSyntaxKind::Import,
216            "in" => DartSyntaxKind::In,
217            "interface" => DartSyntaxKind::Interface,
218            "is" => DartSyntaxKind::Is,
219            "late" => DartSyntaxKind::Late,
220            "library" => DartSyntaxKind::Library,
221            "mixin" => DartSyntaxKind::Mixin,
222            "new" => DartSyntaxKind::New,
223            "null" => DartSyntaxKind::Null,
224            "on" => DartSyntaxKind::On,
225            "operator" => DartSyntaxKind::Operator,
226            "part" => DartSyntaxKind::Part,
227            "required" => DartSyntaxKind::Required,
228            "rethrow" => DartSyntaxKind::Rethrow,
229            "return" => DartSyntaxKind::Return,
230            "set" => DartSyntaxKind::Set,
231            "show" => DartSyntaxKind::Show,
232            "static" => DartSyntaxKind::Static,
233            "super" => DartSyntaxKind::Super,
234            "switch" => DartSyntaxKind::Switch,
235            "sync" => DartSyntaxKind::Sync,
236            "this" => DartSyntaxKind::This,
237            "throw" => DartSyntaxKind::Throw,
238            "true" => DartSyntaxKind::True,
239            "try" => DartSyntaxKind::Try,
240            "typedef" => DartSyntaxKind::Typedef,
241            "var" => DartSyntaxKind::Var,
242            "void" => DartSyntaxKind::Void,
243            "while" => DartSyntaxKind::While,
244            "with" => DartSyntaxKind::With,
245            "yield" => DartSyntaxKind::Yield,
246            _ => DartSyntaxKind::Identifier,
247        };
248
249        state.add_token(kind, start, state.get_position());
250        true
251    }
252
253    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
254        let start = state.get_position();
255
256        let kinds = [
257            ("??=", DartSyntaxKind::QuestionQuestionEqual),
258            ("??", DartSyntaxKind::QuestionQuestion),
259            ("&&", DartSyntaxKind::AmpersandAmpersand),
260            ("||", DartSyntaxKind::PipePipe),
261            ("==", DartSyntaxKind::EqualEqual),
262            ("!=", DartSyntaxKind::BangEqual),
263            (">=", DartSyntaxKind::GreaterEqual),
264            ("<=", DartSyntaxKind::LessEqual),
265            ("+=", DartSyntaxKind::PlusEqual),
266            ("-=", DartSyntaxKind::MinusEqual),
267            ("*=", DartSyntaxKind::StarEqual),
268            ("/=", DartSyntaxKind::SlashEqual),
269            ("%=", DartSyntaxKind::PercentEqual),
270            ("&=", DartSyntaxKind::AmpersandEqual),
271            ("|=", DartSyntaxKind::PipeEqual),
272            ("^=", DartSyntaxKind::CaretEqual),
273            ("~/=", DartSyntaxKind::TildeSlashEqual),
274            ("~/", DartSyntaxKind::TildeSlash),
275            ("<<=", DartSyntaxKind::LeftShiftEqual),
276            (">>=", DartSyntaxKind::RightShiftEqual),
277            ("<<", DartSyntaxKind::LeftShift),
278            (">>", DartSyntaxKind::RightShift),
279            ("=>", DartSyntaxKind::Arrow),
280            ("...", DartSyntaxKind::DotDotDot),
281            ("..", DartSyntaxKind::DotDot),
282            ("?.", DartSyntaxKind::QuestionDot),
283            ("++", DartSyntaxKind::PlusPlus),
284            ("--", DartSyntaxKind::MinusMinus),
285        ];
286
287        for (op, kind) in kinds {
288            if state.consume_if_starts_with(op) {
289                state.add_token(kind, start, state.get_position());
290                return true;
291            }
292        }
293
294        false
295    }
296
297    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
298        let start = state.get_position();
299        let ch = match state.peek() {
300            Some(c) => c,
301            None => return false,
302        };
303
304        let kind = match ch {
305            '(' => DartSyntaxKind::LeftParen,
306            ')' => DartSyntaxKind::RightParen,
307            '{' => DartSyntaxKind::LeftBrace,
308            '}' => DartSyntaxKind::RightBrace,
309            '[' => DartSyntaxKind::LeftBracket,
310            ']' => DartSyntaxKind::RightBracket,
311            ';' => DartSyntaxKind::Semicolon,
312            ',' => DartSyntaxKind::Comma,
313            '.' => DartSyntaxKind::Dot,
314            ':' => DartSyntaxKind::Colon,
315            '?' => DartSyntaxKind::Question,
316            '=' => DartSyntaxKind::Equal,
317            '!' => DartSyntaxKind::Bang,
318            '>' => DartSyntaxKind::Greater,
319            '<' => DartSyntaxKind::Less,
320            '+' => DartSyntaxKind::Plus,
321            '-' => DartSyntaxKind::Minus,
322            '*' => DartSyntaxKind::Star,
323            '/' => DartSyntaxKind::Slash,
324            '%' => DartSyntaxKind::Percent,
325            '&' => DartSyntaxKind::Ampersand,
326            '|' => DartSyntaxKind::Pipe,
327            '^' => DartSyntaxKind::Caret,
328            '~' => DartSyntaxKind::Tilde,
329            '@' => DartSyntaxKind::At,
330            '#' => DartSyntaxKind::Hash,
331            _ => return false,
332        };
333
334        state.advance(ch.len_utf8());
335        state.add_token(kind, start, state.get_position());
336        true
337    }
338}