Skip to main content

oak_actionscript/lexer/
mod.rs

1pub mod token_type;
2
3pub use token_type::ActionScriptTokenType;
4
5use crate::language::ActionScriptLanguage;
6use oak_core::{
7    Lexer, LexerCache, LexerState, OakError,
8    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
9    source::Source,
10};
11use std::sync::LazyLock;
12
13type State<'a, S> = LexerState<'a, S, ActionScriptLanguage>;
14
15static AS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static AS_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
17static AS_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18static AS_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
19
20#[derive(Clone)]
21pub struct ActionScriptLexer<'config> {
22    _config: &'config ActionScriptLanguage,
23}
24
25impl<'config> Lexer<ActionScriptLanguage> for ActionScriptLexer<'config> {
26    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ActionScriptLanguage>) -> LexOutput<ActionScriptLanguage> {
27        let mut state = LexerState::new(source);
28        let result = self.run(&mut state);
29        if result.is_ok() {
30            state.add_eof();
31        }
32        state.finish_with_cache(result, cache)
33    }
34}
35
36impl<'config> ActionScriptLexer<'config> {
37    pub fn new(config: &'config ActionScriptLanguage) -> Self {
38        Self { _config: config }
39    }
40
41    /// 主要词法分析逻辑
42    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
43        while state.not_at_end() {
44            let safe_point = state.get_position();
45            if self.skip_whitespace(state) {
46                continue;
47            }
48
49            if self.skip_comment(state) {
50                continue;
51            }
52
53            if self.lex_string_literal(state) {
54                continue;
55            }
56
57            if self.lex_char_literal(state) {
58                continue;
59            }
60
61            if self.lex_number_literal(state) {
62                continue;
63            }
64
65            if self.lex_identifier_or_keyword(state) {
66                continue;
67            }
68
69            if self.lex_operator_or_delimiter(state) {
70                continue;
71            }
72
73            state.advance_if_dead_lock(safe_point);
74        }
75
76        Ok(())
77    }
78
79    /// 跳过空白字符
80    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
81        AS_WHITESPACE.scan(state, ActionScriptTokenType::Whitespace)
82    }
83
84    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85        AS_COMMENT.scan(state, ActionScriptTokenType::Comment, ActionScriptTokenType::Comment)
86    }
87
88    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
89        let start = state.get_position();
90        let first = match state.peek() {
91            Some(c) => c,
92            None => return false,
93        };
94        if !first.is_ascii_digit() {
95            return false;
96        }
97
98        state.advance(first.len_utf8());
99        while let Some(c) = state.peek() {
100            if c.is_ascii_digit() || c == '_' {
101                state.advance(c.len_utf8());
102            }
103            else {
104                break;
105            }
106        }
107        state.add_token(ActionScriptTokenType::NumberLiteral, start, state.get_position());
108        true
109    }
110
111    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
112        AS_STRING.scan(state, ActionScriptTokenType::StringLiteral)
113    }
114
115    fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
116        AS_CHAR.scan(state, ActionScriptTokenType::CharLiteral)
117    }
118
119    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
120        let start = state.get_position();
121        let first = match state.peek() {
122            Some(c) if c.is_ascii_alphabetic() || c == '_' || c == '$' => c,
123            _ => return false,
124        };
125
126        state.advance(first.len_utf8());
127        while let Some(c) = state.peek() {
128            if c.is_ascii_alphanumeric() || c == '_' || c == '$' {
129                state.advance(c.len_utf8());
130            }
131            else {
132                break;
133            }
134        }
135
136        let end = state.get_position();
137        let text = state.get_text_in((start..end).into());
138        let kind = match text.as_ref() {
139            "as" => ActionScriptTokenType::As,
140            "break" => ActionScriptTokenType::Break,
141            "case" => ActionScriptTokenType::Case,
142            "catch" => ActionScriptTokenType::Catch,
143            "class" => ActionScriptTokenType::Class,
144            "const" => ActionScriptTokenType::Const,
145            "continue" => ActionScriptTokenType::Continue,
146            "default" => ActionScriptTokenType::Default,
147            "delete" => ActionScriptTokenType::Delete,
148            "do" => ActionScriptTokenType::Do,
149            "else" => ActionScriptTokenType::Else,
150            "extends" => ActionScriptTokenType::Extends,
151            "false" => ActionScriptTokenType::False,
152            "finally" => ActionScriptTokenType::Finally,
153            "for" => ActionScriptTokenType::For,
154            "function" => ActionScriptTokenType::Function,
155            "if" => ActionScriptTokenType::If,
156            "implements" => ActionScriptTokenType::Implements,
157            "import" => ActionScriptTokenType::Import,
158            "in" => ActionScriptTokenType::In,
159            "instanceof" => ActionScriptTokenType::Instanceof,
160            "interface" => ActionScriptTokenType::Interface,
161            "internal" => ActionScriptTokenType::Internal,
162            "is" => ActionScriptTokenType::Is,
163            "native" => ActionScriptTokenType::Native,
164            "new" => ActionScriptTokenType::New,
165            "null" => ActionScriptTokenType::Null,
166            "package" => ActionScriptTokenType::Package,
167            "private" => ActionScriptTokenType::Private,
168            "protected" => ActionScriptTokenType::Protected,
169            "public" => ActionScriptTokenType::Public,
170            "return" => ActionScriptTokenType::Return,
171            "static" => ActionScriptTokenType::Static,
172            "super" => ActionScriptTokenType::Super,
173            "switch" => ActionScriptTokenType::Switch,
174            "this" => ActionScriptTokenType::This,
175            "throw" => ActionScriptTokenType::Throw,
176            "true" => ActionScriptTokenType::True,
177            "try" => ActionScriptTokenType::Try,
178            "typeof" => ActionScriptTokenType::Typeof,
179            "use" => ActionScriptTokenType::Use,
180            "var" => ActionScriptTokenType::Var,
181            "void" => ActionScriptTokenType::Void,
182            "while" => ActionScriptTokenType::While,
183            "with" => ActionScriptTokenType::With,
184            "each" => ActionScriptTokenType::Each,
185            "get" => ActionScriptTokenType::Get,
186            "set" => ActionScriptTokenType::Set,
187            "namespace" => ActionScriptTokenType::Namespace,
188            "include" => ActionScriptTokenType::Include,
189            "dynamic" => ActionScriptTokenType::Dynamic,
190            "final" => ActionScriptTokenType::Final,
191            "override" => ActionScriptTokenType::Override,
192            "Array" => ActionScriptTokenType::Array,
193            "Boolean" => ActionScriptTokenType::Boolean,
194            "Date" => ActionScriptTokenType::Date,
195            "Number" => ActionScriptTokenType::Number,
196            "Object" => ActionScriptTokenType::ObjectType,
197            "RegExp" => ActionScriptTokenType::RegExp,
198            "String" => ActionScriptTokenType::StringType,
199            "uint" => ActionScriptTokenType::Uint,
200            "Vector" => ActionScriptTokenType::Vector,
201            "XML" => ActionScriptTokenType::Xml,
202            "XMLList" => ActionScriptTokenType::XmlList,
203            _ => ActionScriptTokenType::Identifier,
204        };
205
206        state.add_token(kind, start, end);
207        true
208    }
209
210    fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
211        let start = state.get_position();
212        let c = match state.peek() {
213            Some(c) => c,
214            None => return false,
215        };
216
217        let kind = match c {
218            '+' => {
219                state.advance(1);
220                match state.peek() {
221                    Some('=') => {
222                        state.advance(1);
223                        ActionScriptTokenType::PlusAssign
224                    }
225                    Some('+') => {
226                        state.advance(1);
227                        ActionScriptTokenType::Increment
228                    }
229                    _ => ActionScriptTokenType::Plus,
230                }
231            }
232            '-' => {
233                state.advance(1);
234                match state.peek() {
235                    Some('=') => {
236                        state.advance(1);
237                        ActionScriptTokenType::MinusAssign
238                    }
239                    Some('-') => {
240                        state.advance(1);
241                        ActionScriptTokenType::Decrement
242                    }
243                    Some('>') => {
244                        state.advance(1);
245                        ActionScriptTokenType::Arrow
246                    }
247                    _ => ActionScriptTokenType::Minus,
248                }
249            }
250            '*' => {
251                state.advance(1);
252                match state.peek() {
253                    Some('=') => {
254                        state.advance(1);
255                        ActionScriptTokenType::StarAssign
256                    }
257                    _ => ActionScriptTokenType::Star,
258                }
259            }
260            '/' => {
261                state.advance(1);
262                match state.peek() {
263                    Some('=') => {
264                        state.advance(1);
265                        ActionScriptTokenType::SlashAssign
266                    }
267                    _ => ActionScriptTokenType::Slash,
268                }
269            }
270            '%' => {
271                state.advance(1);
272                match state.peek() {
273                    Some('=') => {
274                        state.advance(1);
275                        ActionScriptTokenType::PercentAssign
276                    }
277                    _ => ActionScriptTokenType::Percent,
278                }
279            }
280            '=' => {
281                state.advance(1);
282                match state.peek() {
283                    Some('=') => {
284                        state.advance(1);
285                        match state.peek() {
286                            Some('=') => {
287                                state.advance(1);
288                                ActionScriptTokenType::EqualEqualEqual
289                            }
290                            _ => ActionScriptTokenType::EqualEqual,
291                        }
292                    }
293                    _ => ActionScriptTokenType::Equal,
294                }
295            }
296            '!' => {
297                state.advance(1);
298                match state.peek() {
299                    Some('=') => {
300                        state.advance(1);
301                        match state.peek() {
302                            Some('=') => {
303                                state.advance(1);
304                                ActionScriptTokenType::NotEqualEqual
305                            }
306                            _ => ActionScriptTokenType::NotEqual,
307                        }
308                    }
309                    _ => ActionScriptTokenType::LogicalNot,
310                }
311            }
312            '<' => {
313                state.advance(1);
314                match state.peek() {
315                    Some('<') => {
316                        state.advance(1);
317                        match state.peek() {
318                            Some('=') => {
319                                state.advance(1);
320                                ActionScriptTokenType::LeftShiftAssign
321                            }
322                            _ => ActionScriptTokenType::LeftShift,
323                        }
324                    }
325                    Some('=') => {
326                        state.advance(1);
327                        ActionScriptTokenType::LessEqual
328                    }
329                    _ => ActionScriptTokenType::LessThan,
330                }
331            }
332            '>' => {
333                state.advance(1);
334                match state.peek() {
335                    Some('>') => {
336                        state.advance(1);
337                        match state.peek() {
338                            Some('>') => {
339                                state.advance(1);
340                                match state.peek() {
341                                    Some('=') => {
342                                        state.advance(1);
343                                        ActionScriptTokenType::UnsignedRightShiftAssign
344                                    }
345                                    _ => ActionScriptTokenType::UnsignedRightShift,
346                                }
347                            }
348                            Some('=') => {
349                                state.advance(1);
350                                ActionScriptTokenType::RightShiftAssign
351                            }
352                            _ => ActionScriptTokenType::RightShift,
353                        }
354                    }
355                    Some('=') => {
356                        state.advance(1);
357                        ActionScriptTokenType::GreaterEqual
358                    }
359                    _ => ActionScriptTokenType::GreaterThan,
360                }
361            }
362            '&' => {
363                state.advance(1);
364                match state.peek() {
365                    Some('&') => {
366                        state.advance(1);
367                        ActionScriptTokenType::LogicalAnd
368                    }
369                    Some('=') => {
370                        state.advance(1);
371                        ActionScriptTokenType::BitwiseAndAssign
372                    }
373                    _ => ActionScriptTokenType::BitwiseAnd,
374                }
375            }
376            '|' => {
377                state.advance(1);
378                match state.peek() {
379                    Some('|') => {
380                        state.advance(1);
381                        ActionScriptTokenType::LogicalOr
382                    }
383                    Some('=') => {
384                        state.advance(1);
385                        ActionScriptTokenType::BitwiseOrAssign
386                    }
387                    _ => ActionScriptTokenType::BitwiseOr,
388                }
389            }
390            '^' => {
391                state.advance(1);
392                match state.peek() {
393                    Some('=') => {
394                        state.advance(1);
395                        ActionScriptTokenType::BitwiseXorAssign
396                    }
397                    _ => ActionScriptTokenType::BitwiseXor,
398                }
399            }
400            '~' => {
401                state.advance(1);
402                ActionScriptTokenType::BitwiseNot
403            }
404            '?' => {
405                state.advance(1);
406                ActionScriptTokenType::Question
407            }
408            ':' => {
409                state.advance(1);
410                ActionScriptTokenType::Colon
411            }
412            '.' => {
413                state.advance(1);
414                ActionScriptTokenType::Dot
415            }
416            '(' => {
417                state.advance(1);
418                ActionScriptTokenType::LeftParen
419            }
420            ')' => {
421                state.advance(1);
422                ActionScriptTokenType::RightParen
423            }
424            '{' => {
425                state.advance(1);
426                ActionScriptTokenType::LeftBrace
427            }
428            '}' => {
429                state.advance(1);
430                ActionScriptTokenType::RightBrace
431            }
432            '[' => {
433                state.advance(1);
434                ActionScriptTokenType::LeftBracket
435            }
436            ']' => {
437                state.advance(1);
438                ActionScriptTokenType::RightBracket
439            }
440            ';' => {
441                state.advance(1);
442                ActionScriptTokenType::Semicolon
443            }
444            ',' => {
445                state.advance(1);
446                ActionScriptTokenType::Comma
447            }
448            '@' => {
449                state.advance(1);
450                ActionScriptTokenType::At
451            }
452            '#' => {
453                state.advance(1);
454                ActionScriptTokenType::Hash
455            }
456            '$' => {
457                state.advance(1);
458                ActionScriptTokenType::Dollar
459            }
460            '\\' => {
461                state.advance(1);
462                ActionScriptTokenType::Backslash
463            }
464            '\'' => {
465                state.advance(1);
466                ActionScriptTokenType::Quote
467            }
468            '"' => {
469                state.advance(1);
470                ActionScriptTokenType::DoubleQuote
471            }
472            '`' => {
473                state.advance(1);
474                ActionScriptTokenType::Backtick
475            }
476            _ => return false,
477        };
478
479        state.add_token(kind, start, state.get_position());
480        true
481    }
482}