Skip to main content

oak_actionscript/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2use oak_core::Source;
3/// Token types for the ActionScript language.
4pub mod token_type;
5
6pub use token_type::ActionScriptTokenType;
7
8use crate::language::ActionScriptLanguage;
9use oak_core::{
10    Lexer, LexerCache, LexerState, OakError,
11    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
12};
13use std::sync::LazyLock;
14
15pub(crate) type State<'a, S> = LexerState<'a, S, ActionScriptLanguage>;
16
17static AS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
18static AS_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
19static AS_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
20static AS_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
21
22/// A lexer for the ActionScript language.
23#[derive(Clone)]
24pub struct ActionScriptLexer<'config> {
25    _config: &'config ActionScriptLanguage,
26}
27
28impl<'config> Lexer<ActionScriptLanguage> for ActionScriptLexer<'config> {
29    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ActionScriptLanguage>) -> LexOutput<ActionScriptLanguage> {
30        let mut state = LexerState::new(source);
31        let result = self.run(&mut state);
32        if result.is_ok() {
33            state.add_eof();
34        }
35        state.finish_with_cache(result, cache)
36    }
37}
38
39impl<'config> ActionScriptLexer<'config> {
40    /// Creates a new ActionScript lexer with the given configuration.
41    pub fn new(config: &'config ActionScriptLanguage) -> Self {
42        Self { _config: config }
43    }
44
45    /// Main lexical analysis logic
46    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
47        while state.not_at_end() {
48            let safe_point = state.get_position();
49            if self.skip_whitespace(state) {
50                continue;
51            }
52
53            if self.skip_comment(state) {
54                continue;
55            }
56
57            if self.lex_string_literal(state) {
58                continue;
59            }
60
61            if self.lex_char_literal(state) {
62                continue;
63            }
64
65            if self.lex_number_literal(state) {
66                continue;
67            }
68
69            if self.lex_identifier_or_keyword(state) {
70                continue;
71            }
72
73            if self.lex_operator_or_delimiter(state) {
74                continue;
75            }
76
77            state.advance_if_dead_lock(safe_point);
78        }
79
80        Ok(())
81    }
82
83    /// Skips whitespace characters
84    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85        AS_WHITESPACE.scan(state, ActionScriptTokenType::Whitespace)
86    }
87
88    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
89        AS_COMMENT.scan(state, ActionScriptTokenType::Comment, ActionScriptTokenType::Comment)
90    }
91
92    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
93        let start = state.get_position();
94        let first = match state.peek() {
95            Some(c) => c,
96            None => return false,
97        };
98        if !first.is_ascii_digit() {
99            return false;
100        }
101
102        state.advance(first.len_utf8());
103        while let Some(c) = state.peek() {
104            if c.is_ascii_digit() || c == '_' {
105                state.advance(c.len_utf8());
106            }
107            else {
108                break;
109            }
110        }
111        state.add_token(ActionScriptTokenType::NumberLiteral, start, state.get_position());
112        true
113    }
114
115    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
116        AS_STRING.scan(state, ActionScriptTokenType::StringLiteral)
117    }
118
119    fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
120        AS_CHAR.scan(state, ActionScriptTokenType::StringLiteral)
121    }
122
123    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
124        let start = state.get_position();
125        let first = match state.peek() {
126            Some(c) => c,
127            None => return false,
128        };
129
130        if !first.is_ascii_alphabetic() && first != '_' && first != '$' {
131            return false;
132        }
133
134        state.advance(first.len_utf8());
135        while let Some(c) = state.peek() {
136            if c.is_ascii_alphanumeric() || c == '_' || c == '$' {
137                state.advance(c.len_utf8());
138            }
139            else {
140                break;
141            }
142        }
143
144        let end = state.get_position();
145        let text = state.source().get_text_in(oak_core::Range { start, end });
146        let kind = match text.as_ref() {
147            "class" => ActionScriptTokenType::Class,
148            "interface" => ActionScriptTokenType::Interface,
149            "function" => ActionScriptTokenType::Function,
150            "var" => ActionScriptTokenType::Var,
151            "const" => ActionScriptTokenType::Const,
152            "public" => ActionScriptTokenType::Public,
153            "private" => ActionScriptTokenType::Private,
154            "protected" => ActionScriptTokenType::Protected,
155            "internal" => ActionScriptTokenType::Internal,
156            "static" => ActionScriptTokenType::Static,
157            "override" => ActionScriptTokenType::Override,
158            "package" => ActionScriptTokenType::Package,
159            "import" => ActionScriptTokenType::Import,
160            "extends" => ActionScriptTokenType::Extends,
161            "implements" => ActionScriptTokenType::Implements,
162            "new" => ActionScriptTokenType::New,
163            "this" => ActionScriptTokenType::This,
164            "super" => ActionScriptTokenType::Super,
165            "if" => ActionScriptTokenType::If,
166            "else" => ActionScriptTokenType::Else,
167            "for" => ActionScriptTokenType::For,
168            "while" => ActionScriptTokenType::While,
169            "do" => ActionScriptTokenType::Do,
170            "switch" => ActionScriptTokenType::Switch,
171            "case" => ActionScriptTokenType::Case,
172            "default" => ActionScriptTokenType::Default,
173            "break" => ActionScriptTokenType::Break,
174            "continue" => ActionScriptTokenType::Continue,
175            "return" => ActionScriptTokenType::Return,
176            "try" => ActionScriptTokenType::Try,
177            "catch" => ActionScriptTokenType::Catch,
178            "finally" => ActionScriptTokenType::Finally,
179            "throw" => ActionScriptTokenType::Throw,
180            "void" => ActionScriptTokenType::Void,
181            "null" => ActionScriptTokenType::Null,
182            "true" => ActionScriptTokenType::True,
183            "false" => ActionScriptTokenType::False,
184            _ => ActionScriptTokenType::Identifier,
185        };
186
187        state.add_token(kind, start, end);
188        true
189    }
190
191    fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
192        let start = state.get_position();
193        let c = match state.peek() {
194            Some(c) => c,
195            None => return false,
196        };
197
198        let kind = match c {
199            '+' => {
200                state.advance(1);
201                match state.peek() {
202                    Some('=') => {
203                        state.advance(1);
204                        ActionScriptTokenType::PlusAssign
205                    }
206                    Some('+') => {
207                        state.advance(1);
208                        ActionScriptTokenType::Increment
209                    }
210                    _ => ActionScriptTokenType::Plus,
211                }
212            }
213            '-' => {
214                state.advance(1);
215                match state.peek() {
216                    Some('=') => {
217                        state.advance(1);
218                        ActionScriptTokenType::MinusAssign
219                    }
220                    Some('-') => {
221                        state.advance(1);
222                        ActionScriptTokenType::Decrement
223                    }
224                    Some('>') => {
225                        state.advance(1);
226                        ActionScriptTokenType::Arrow
227                    }
228                    _ => ActionScriptTokenType::Minus,
229                }
230            }
231            '*' => {
232                state.advance(1);
233                match state.peek() {
234                    Some('=') => {
235                        state.advance(1);
236                        ActionScriptTokenType::StarAssign
237                    }
238                    _ => ActionScriptTokenType::Star,
239                }
240            }
241            '/' => {
242                state.advance(1);
243                match state.peek() {
244                    Some('=') => {
245                        state.advance(1);
246                        ActionScriptTokenType::SlashAssign
247                    }
248                    _ => ActionScriptTokenType::Slash,
249                }
250            }
251            '%' => {
252                state.advance(1);
253                match state.peek() {
254                    Some('=') => {
255                        state.advance(1);
256                        ActionScriptTokenType::PercentAssign
257                    }
258                    _ => ActionScriptTokenType::Percent,
259                }
260            }
261            '=' => {
262                state.advance(1);
263                match state.peek() {
264                    Some('=') => {
265                        state.advance(1);
266                        match state.peek() {
267                            Some('=') => {
268                                state.advance(1);
269                                ActionScriptTokenType::EqualEqualEqual
270                            }
271                            _ => ActionScriptTokenType::EqualEqual,
272                        }
273                    }
274                    _ => ActionScriptTokenType::Equal,
275                }
276            }
277            '!' => {
278                state.advance(1);
279                match state.peek() {
280                    Some('=') => {
281                        state.advance(1);
282                        match state.peek() {
283                            Some('=') => {
284                                state.advance(1);
285                                ActionScriptTokenType::NotEqualEqual
286                            }
287                            _ => ActionScriptTokenType::NotEqual,
288                        }
289                    }
290                    _ => ActionScriptTokenType::LogicalNot,
291                }
292            }
293            '<' => {
294                state.advance(1);
295                match state.peek() {
296                    Some('<') => {
297                        state.advance(1);
298                        match state.peek() {
299                            Some('=') => {
300                                state.advance(1);
301                                ActionScriptTokenType::LeftShiftAssign
302                            }
303                            _ => ActionScriptTokenType::LeftShift,
304                        }
305                    }
306                    Some('=') => {
307                        state.advance(1);
308                        ActionScriptTokenType::LessEqual
309                    }
310                    _ => ActionScriptTokenType::LessThan,
311                }
312            }
313            '>' => {
314                state.advance(1);
315                match state.peek() {
316                    Some('>') => {
317                        state.advance(1);
318                        match state.peek() {
319                            Some('>') => {
320                                state.advance(1);
321                                match state.peek() {
322                                    Some('=') => {
323                                        state.advance(1);
324                                        ActionScriptTokenType::UnsignedRightShiftAssign
325                                    }
326                                    _ => ActionScriptTokenType::UnsignedRightShift,
327                                }
328                            }
329                            Some('=') => {
330                                state.advance(1);
331                                ActionScriptTokenType::RightShiftAssign
332                            }
333                            _ => ActionScriptTokenType::RightShift,
334                        }
335                    }
336                    Some('=') => {
337                        state.advance(1);
338                        ActionScriptTokenType::GreaterEqual
339                    }
340                    _ => ActionScriptTokenType::GreaterThan,
341                }
342            }
343            '&' => {
344                state.advance(1);
345                match state.peek() {
346                    Some('&') => {
347                        state.advance(1);
348                        ActionScriptTokenType::LogicalAnd
349                    }
350                    Some('=') => {
351                        state.advance(1);
352                        ActionScriptTokenType::BitwiseAndAssign
353                    }
354                    _ => ActionScriptTokenType::BitwiseAnd,
355                }
356            }
357            '|' => {
358                state.advance(1);
359                match state.peek() {
360                    Some('|') => {
361                        state.advance(1);
362                        ActionScriptTokenType::LogicalOr
363                    }
364                    Some('=') => {
365                        state.advance(1);
366                        ActionScriptTokenType::BitwiseOrAssign
367                    }
368                    _ => ActionScriptTokenType::BitwiseOr,
369                }
370            }
371            '^' => {
372                state.advance(1);
373                match state.peek() {
374                    Some('=') => {
375                        state.advance(1);
376                        ActionScriptTokenType::BitwiseXorAssign
377                    }
378                    _ => ActionScriptTokenType::BitwiseXor,
379                }
380            }
381            '~' => {
382                state.advance(1);
383                ActionScriptTokenType::BitwiseNot
384            }
385            '?' => {
386                state.advance(1);
387                ActionScriptTokenType::Question
388            }
389            ':' => {
390                state.advance(1);
391                ActionScriptTokenType::Colon
392            }
393            '.' => {
394                state.advance(1);
395                ActionScriptTokenType::Dot
396            }
397            '(' => {
398                state.advance(1);
399                ActionScriptTokenType::LeftParen
400            }
401            ')' => {
402                state.advance(1);
403                ActionScriptTokenType::RightParen
404            }
405            '{' => {
406                state.advance(1);
407                ActionScriptTokenType::LeftBrace
408            }
409            '}' => {
410                state.advance(1);
411                ActionScriptTokenType::RightBrace
412            }
413            '[' => {
414                state.advance(1);
415                ActionScriptTokenType::LeftBracket
416            }
417            ']' => {
418                state.advance(1);
419                ActionScriptTokenType::RightBracket
420            }
421            ';' => {
422                state.advance(1);
423                ActionScriptTokenType::Semicolon
424            }
425            ',' => {
426                state.advance(1);
427                ActionScriptTokenType::Comma
428            }
429            '@' => {
430                state.advance(1);
431                ActionScriptTokenType::At
432            }
433            '#' => {
434                state.advance(1);
435                ActionScriptTokenType::Hash
436            }
437            '$' => {
438                state.advance(1);
439                ActionScriptTokenType::Dollar
440            }
441            '\\' => {
442                state.advance(1);
443                ActionScriptTokenType::Backslash
444            }
445            '\'' => {
446                state.advance(1);
447                ActionScriptTokenType::Quote
448            }
449            '"' => {
450                state.advance(1);
451                ActionScriptTokenType::DoubleQuote
452            }
453            '`' => {
454                state.advance(1);
455                ActionScriptTokenType::Backtick
456            }
457            _ => return false,
458        };
459
460        state.add_token(kind, start, state.get_position());
461        true
462    }
463}