Skip to main content

oak_actionscript/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2use oak_core::Source;
3pub mod token_type;
4
5pub use token_type::ActionScriptTokenType;
6
7use crate::language::ActionScriptLanguage;
8use oak_core::{
9    Lexer, LexerCache, LexerState, OakError,
10    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
11};
12use std::sync::LazyLock;
13
14type State<'a, S> = LexerState<'a, S, ActionScriptLanguage>;
15
16static AS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
17static AS_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
18static AS_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
19static AS_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
20
21#[derive(Clone)]
22pub struct ActionScriptLexer<'config> {
23    _config: &'config ActionScriptLanguage,
24}
25
26impl<'config> Lexer<ActionScriptLanguage> for ActionScriptLexer<'config> {
27    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ActionScriptLanguage>) -> LexOutput<ActionScriptLanguage> {
28        let mut state = LexerState::new(source);
29        let result = self.run(&mut state);
30        if result.is_ok() {
31            state.add_eof();
32        }
33        state.finish_with_cache(result, cache)
34    }
35}
36
37impl<'config> ActionScriptLexer<'config> {
38    pub fn new(config: &'config ActionScriptLanguage) -> Self {
39        Self { _config: config }
40    }
41
42    /// 主要词法分析逻辑
43    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
44        while state.not_at_end() {
45            let safe_point = state.get_position();
46            if self.skip_whitespace(state) {
47                continue;
48            }
49
50            if self.skip_comment(state) {
51                continue;
52            }
53
54            if self.lex_string_literal(state) {
55                continue;
56            }
57
58            if self.lex_char_literal(state) {
59                continue;
60            }
61
62            if self.lex_number_literal(state) {
63                continue;
64            }
65
66            if self.lex_identifier_or_keyword(state) {
67                continue;
68            }
69
70            if self.lex_operator_or_delimiter(state) {
71                continue;
72            }
73
74            state.advance_if_dead_lock(safe_point);
75        }
76
77        Ok(())
78    }
79
80    /// 跳过空白字符
81    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82        AS_WHITESPACE.scan(state, ActionScriptTokenType::Whitespace)
83    }
84
85    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
86        AS_COMMENT.scan(state, ActionScriptTokenType::Comment, ActionScriptTokenType::Comment)
87    }
88
89    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90        let start = state.get_position();
91        let first = match state.peek() {
92            Some(c) => c,
93            None => return false,
94        };
95        if !first.is_ascii_digit() {
96            return false;
97        }
98
99        state.advance(first.len_utf8());
100        while let Some(c) = state.peek() {
101            if c.is_ascii_digit() || c == '_' {
102                state.advance(c.len_utf8());
103            }
104            else {
105                break;
106            }
107        }
108        state.add_token(ActionScriptTokenType::NumberLiteral, start, state.get_position());
109        true
110    }
111
112    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113        AS_STRING.scan(state, ActionScriptTokenType::StringLiteral)
114    }
115
116    fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
117        AS_CHAR.scan(state, ActionScriptTokenType::StringLiteral)
118    }
119
120    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
121        let start = state.get_position();
122        let first = match state.peek() {
123            Some(c) => c,
124            None => return false,
125        };
126
127        if !first.is_ascii_alphabetic() && first != '_' && first != '$' {
128            return false;
129        }
130
131        state.advance(first.len_utf8());
132        while let Some(c) = state.peek() {
133            if c.is_ascii_alphanumeric() || c == '_' || c == '$' {
134                state.advance(c.len_utf8());
135            }
136            else {
137                break;
138            }
139        }
140
141        let end = state.get_position();
142        let text = state.source().get_text_in(oak_core::Range { start, end });
143        let kind = match text.as_ref() {
144            "class" => ActionScriptTokenType::Class,
145            "interface" => ActionScriptTokenType::Interface,
146            "function" => ActionScriptTokenType::Function,
147            "var" => ActionScriptTokenType::Var,
148            "const" => ActionScriptTokenType::Const,
149            "public" => ActionScriptTokenType::Public,
150            "private" => ActionScriptTokenType::Private,
151            "protected" => ActionScriptTokenType::Protected,
152            "internal" => ActionScriptTokenType::Internal,
153            "static" => ActionScriptTokenType::Static,
154            "override" => ActionScriptTokenType::Override,
155            "package" => ActionScriptTokenType::Package,
156            "import" => ActionScriptTokenType::Import,
157            "extends" => ActionScriptTokenType::Extends,
158            "implements" => ActionScriptTokenType::Implements,
159            "new" => ActionScriptTokenType::New,
160            "this" => ActionScriptTokenType::This,
161            "super" => ActionScriptTokenType::Super,
162            "if" => ActionScriptTokenType::If,
163            "else" => ActionScriptTokenType::Else,
164            "for" => ActionScriptTokenType::For,
165            "while" => ActionScriptTokenType::While,
166            "do" => ActionScriptTokenType::Do,
167            "switch" => ActionScriptTokenType::Switch,
168            "case" => ActionScriptTokenType::Case,
169            "default" => ActionScriptTokenType::Default,
170            "break" => ActionScriptTokenType::Break,
171            "continue" => ActionScriptTokenType::Continue,
172            "return" => ActionScriptTokenType::Return,
173            "try" => ActionScriptTokenType::Try,
174            "catch" => ActionScriptTokenType::Catch,
175            "finally" => ActionScriptTokenType::Finally,
176            "throw" => ActionScriptTokenType::Throw,
177            "void" => ActionScriptTokenType::Void,
178            "null" => ActionScriptTokenType::Null,
179            "true" => ActionScriptTokenType::True,
180            "false" => ActionScriptTokenType::False,
181            _ => ActionScriptTokenType::Identifier,
182        };
183
184        state.add_token(kind, start, end);
185        true
186    }
187
188    fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
189        let start = state.get_position();
190        let c = match state.peek() {
191            Some(c) => c,
192            None => return false,
193        };
194
195        let kind = match c {
196            '+' => {
197                state.advance(1);
198                match state.peek() {
199                    Some('=') => {
200                        state.advance(1);
201                        ActionScriptTokenType::PlusAssign
202                    }
203                    Some('+') => {
204                        state.advance(1);
205                        ActionScriptTokenType::Increment
206                    }
207                    _ => ActionScriptTokenType::Plus,
208                }
209            }
210            '-' => {
211                state.advance(1);
212                match state.peek() {
213                    Some('=') => {
214                        state.advance(1);
215                        ActionScriptTokenType::MinusAssign
216                    }
217                    Some('-') => {
218                        state.advance(1);
219                        ActionScriptTokenType::Decrement
220                    }
221                    Some('>') => {
222                        state.advance(1);
223                        ActionScriptTokenType::Arrow
224                    }
225                    _ => ActionScriptTokenType::Minus,
226                }
227            }
228            '*' => {
229                state.advance(1);
230                match state.peek() {
231                    Some('=') => {
232                        state.advance(1);
233                        ActionScriptTokenType::StarAssign
234                    }
235                    _ => ActionScriptTokenType::Star,
236                }
237            }
238            '/' => {
239                state.advance(1);
240                match state.peek() {
241                    Some('=') => {
242                        state.advance(1);
243                        ActionScriptTokenType::SlashAssign
244                    }
245                    _ => ActionScriptTokenType::Slash,
246                }
247            }
248            '%' => {
249                state.advance(1);
250                match state.peek() {
251                    Some('=') => {
252                        state.advance(1);
253                        ActionScriptTokenType::PercentAssign
254                    }
255                    _ => ActionScriptTokenType::Percent,
256                }
257            }
258            '=' => {
259                state.advance(1);
260                match state.peek() {
261                    Some('=') => {
262                        state.advance(1);
263                        match state.peek() {
264                            Some('=') => {
265                                state.advance(1);
266                                ActionScriptTokenType::EqualEqualEqual
267                            }
268                            _ => ActionScriptTokenType::EqualEqual,
269                        }
270                    }
271                    _ => ActionScriptTokenType::Equal,
272                }
273            }
274            '!' => {
275                state.advance(1);
276                match state.peek() {
277                    Some('=') => {
278                        state.advance(1);
279                        match state.peek() {
280                            Some('=') => {
281                                state.advance(1);
282                                ActionScriptTokenType::NotEqualEqual
283                            }
284                            _ => ActionScriptTokenType::NotEqual,
285                        }
286                    }
287                    _ => ActionScriptTokenType::LogicalNot,
288                }
289            }
290            '<' => {
291                state.advance(1);
292                match state.peek() {
293                    Some('<') => {
294                        state.advance(1);
295                        match state.peek() {
296                            Some('=') => {
297                                state.advance(1);
298                                ActionScriptTokenType::LeftShiftAssign
299                            }
300                            _ => ActionScriptTokenType::LeftShift,
301                        }
302                    }
303                    Some('=') => {
304                        state.advance(1);
305                        ActionScriptTokenType::LessEqual
306                    }
307                    _ => ActionScriptTokenType::LessThan,
308                }
309            }
310            '>' => {
311                state.advance(1);
312                match state.peek() {
313                    Some('>') => {
314                        state.advance(1);
315                        match state.peek() {
316                            Some('>') => {
317                                state.advance(1);
318                                match state.peek() {
319                                    Some('=') => {
320                                        state.advance(1);
321                                        ActionScriptTokenType::UnsignedRightShiftAssign
322                                    }
323                                    _ => ActionScriptTokenType::UnsignedRightShift,
324                                }
325                            }
326                            Some('=') => {
327                                state.advance(1);
328                                ActionScriptTokenType::RightShiftAssign
329                            }
330                            _ => ActionScriptTokenType::RightShift,
331                        }
332                    }
333                    Some('=') => {
334                        state.advance(1);
335                        ActionScriptTokenType::GreaterEqual
336                    }
337                    _ => ActionScriptTokenType::GreaterThan,
338                }
339            }
340            '&' => {
341                state.advance(1);
342                match state.peek() {
343                    Some('&') => {
344                        state.advance(1);
345                        ActionScriptTokenType::LogicalAnd
346                    }
347                    Some('=') => {
348                        state.advance(1);
349                        ActionScriptTokenType::BitwiseAndAssign
350                    }
351                    _ => ActionScriptTokenType::BitwiseAnd,
352                }
353            }
354            '|' => {
355                state.advance(1);
356                match state.peek() {
357                    Some('|') => {
358                        state.advance(1);
359                        ActionScriptTokenType::LogicalOr
360                    }
361                    Some('=') => {
362                        state.advance(1);
363                        ActionScriptTokenType::BitwiseOrAssign
364                    }
365                    _ => ActionScriptTokenType::BitwiseOr,
366                }
367            }
368            '^' => {
369                state.advance(1);
370                match state.peek() {
371                    Some('=') => {
372                        state.advance(1);
373                        ActionScriptTokenType::BitwiseXorAssign
374                    }
375                    _ => ActionScriptTokenType::BitwiseXor,
376                }
377            }
378            '~' => {
379                state.advance(1);
380                ActionScriptTokenType::BitwiseNot
381            }
382            '?' => {
383                state.advance(1);
384                ActionScriptTokenType::Question
385            }
386            ':' => {
387                state.advance(1);
388                ActionScriptTokenType::Colon
389            }
390            '.' => {
391                state.advance(1);
392                ActionScriptTokenType::Dot
393            }
394            '(' => {
395                state.advance(1);
396                ActionScriptTokenType::LeftParen
397            }
398            ')' => {
399                state.advance(1);
400                ActionScriptTokenType::RightParen
401            }
402            '{' => {
403                state.advance(1);
404                ActionScriptTokenType::LeftBrace
405            }
406            '}' => {
407                state.advance(1);
408                ActionScriptTokenType::RightBrace
409            }
410            '[' => {
411                state.advance(1);
412                ActionScriptTokenType::LeftBracket
413            }
414            ']' => {
415                state.advance(1);
416                ActionScriptTokenType::RightBracket
417            }
418            ';' => {
419                state.advance(1);
420                ActionScriptTokenType::Semicolon
421            }
422            ',' => {
423                state.advance(1);
424                ActionScriptTokenType::Comma
425            }
426            '@' => {
427                state.advance(1);
428                ActionScriptTokenType::At
429            }
430            '#' => {
431                state.advance(1);
432                ActionScriptTokenType::Hash
433            }
434            '$' => {
435                state.advance(1);
436                ActionScriptTokenType::Dollar
437            }
438            '\\' => {
439                state.advance(1);
440                ActionScriptTokenType::Backslash
441            }
442            '\'' => {
443                state.advance(1);
444                ActionScriptTokenType::Quote
445            }
446            '"' => {
447                state.advance(1);
448                ActionScriptTokenType::DoubleQuote
449            }
450            '`' => {
451                state.advance(1);
452                ActionScriptTokenType::Backtick
453            }
454            _ => return false,
455        };
456
457        state.add_token(kind, start, state.get_position());
458        true
459    }
460}