oak_actionscript/lexer/
mod.rs

1use crate::{kind::ActionScriptSyntaxKind, language::ActionScriptLanguage};
2use oak_core::{
3    IncrementalCache, Lexer, LexerState, OakError,
4    lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S: Source> = LexerState<S, ActionScriptLanguage>;
10
11static AS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static AS_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["//"] });
13static AS_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14static AS_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
15
16#[derive(Clone)]
17pub struct ActionScriptLexer<'config> {
18    config: &'config ActionScriptLanguage,
19}
20
21impl<'config> Lexer<ActionScriptLanguage> for ActionScriptLexer<'config> {
22    fn lex_incremental(
23        &self,
24        source: impl Source,
25        changed: usize,
26        cache: IncrementalCache<ActionScriptLanguage>,
27    ) -> LexOutput<ActionScriptLanguage> {
28        let mut state = LexerState::new_with_cache(source, changed, cache);
29        let result = self.run(&mut state);
30        state.finish(result)
31    }
32}
33
34impl<'config> ActionScriptLexer<'config> {
35    pub fn new(config: &'config ActionScriptLanguage) -> Self {
36        Self { config }
37    }
38
39    /// 主要词法分析逻辑
40    fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
41        while state.not_at_end() {
42            let safe_point = state.get_position();
43            if self.skip_whitespace(state) {
44                continue;
45            }
46
47            if self.skip_comment(state) {
48                continue;
49            }
50
51            if self.lex_string_literal(state) {
52                continue;
53            }
54
55            if self.lex_char_literal(state) {
56                continue;
57            }
58
59            if self.lex_number_literal(state) {
60                continue;
61            }
62
63            if self.lex_identifier_or_keyword(state) {
64                continue;
65            }
66
67            if self.lex_operator_or_delimiter(state) {
68                continue;
69            }
70
71            state.safe_check(safe_point);
72        }
73
74        // 添加 EOF kind
75        let eof_pos = state.get_position();
76        state.add_token(ActionScriptSyntaxKind::Eof, eof_pos, eof_pos);
77        Ok(())
78    }
79
80    /// 跳过空白字符
81    fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
82        match AS_WHITESPACE.scan(state.rest(), state.get_position(), ActionScriptSyntaxKind::Whitespace) {
83            Some(token) => {
84                state.advance_with(token);
85                return true;
86            }
87            None => {}
88        }
89        false
90    }
91
92    fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
93        let start = state.get_position();
94        let rest = state.rest();
95        // line comment: // ... until newline
96        if rest.starts_with("//") {
97            state.advance(2);
98            while let Some(ch) = state.peek() {
99                if ch == '\n' || ch == '\r' {
100                    break;
101                }
102                state.advance(ch.len_utf8());
103            }
104            state.add_token(ActionScriptSyntaxKind::Comment, start, state.get_position());
105            return true;
106        }
107        // block comment: /* ... */ with nesting support
108        if rest.starts_with("/*") {
109            state.advance(2);
110            let mut depth = 1usize;
111            while let Some(ch) = state.peek() {
112                if ch == '/' && state.peek_next_n(1) == Some('*') {
113                    state.advance(2);
114                    depth += 1;
115                    continue;
116                }
117                if ch == '*' && state.peek_next_n(1) == Some('/') {
118                    state.advance(2);
119                    depth -= 1;
120                    if depth == 0 {
121                        break;
122                    }
123                    continue;
124                }
125                state.advance(ch.len_utf8());
126            }
127            state.add_token(ActionScriptSyntaxKind::Comment, start, state.get_position());
128            return true;
129        }
130        false
131    }
132
133    fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
134        let start = state.get_position();
135        let first = match state.peek() {
136            Some(c) => c,
137            None => return false,
138        };
139        if !first.is_ascii_digit() {
140            return false;
141        }
142
143        state.advance(first.len_utf8());
144        while let Some(c) = state.peek() {
145            if c.is_ascii_digit() || c == '_' {
146                state.advance(c.len_utf8());
147            }
148            else {
149                break;
150            }
151        }
152
153        // fractional part
154        if state.peek() == Some('.') {
155            let n1 = state.peek_next_n(1);
156            if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
157                state.advance(1); // consume '.'
158                while let Some(c) = state.peek() {
159                    if c.is_ascii_digit() || c == '_' {
160                        state.advance(c.len_utf8());
161                    }
162                    else {
163                        break;
164                    }
165                }
166            }
167        }
168        // exponent
169        if let Some(c) = state.peek() {
170            if c == 'e' || c == 'E' {
171                let n1 = state.peek_next_n(1);
172                if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
173                    state.advance(1);
174                    if let Some(sign) = state.peek() {
175                        if sign == '+' || sign == '-' {
176                            state.advance(1);
177                        }
178                    }
179                    while let Some(d) = state.peek() {
180                        if d.is_ascii_digit() || d == '_' {
181                            state.advance(d.len_utf8());
182                        }
183                        else {
184                            break;
185                        }
186                    }
187                }
188            }
189        }
190        let end = state.get_position();
191        state.add_token(ActionScriptSyntaxKind::NumberLiteral, start, end);
192        true
193    }
194
195    fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
196        let start = state.get_position();
197        let first = match state.peek() {
198            Some(c) => c,
199            None => return false,
200        };
201        if !first.is_ascii_alphabetic() && first != '_' && first != '$' {
202            return false;
203        }
204
205        let mut buf = String::new();
206        buf.push(first);
207        state.advance(first.len_utf8());
208        while let Some(c) = state.peek() {
209            if c.is_ascii_alphanumeric() || c == '_' || c == '$' {
210                buf.push(c);
211                state.advance(c.len_utf8());
212            }
213            else {
214                break;
215            }
216        }
217
218        let end = state.get_position();
219        let kind = match buf.as_str() {
220            "as" => ActionScriptSyntaxKind::As,
221            "break" => ActionScriptSyntaxKind::Break,
222            "case" => ActionScriptSyntaxKind::Case,
223            "catch" => ActionScriptSyntaxKind::Catch,
224            "class" => ActionScriptSyntaxKind::Class,
225            "const" => ActionScriptSyntaxKind::Const,
226            "continue" => ActionScriptSyntaxKind::Continue,
227            "default" => ActionScriptSyntaxKind::Default,
228            "delete" => ActionScriptSyntaxKind::Delete,
229            "do" => ActionScriptSyntaxKind::Do,
230            "else" => ActionScriptSyntaxKind::Else,
231            "extends" => ActionScriptSyntaxKind::Extends,
232            "false" => ActionScriptSyntaxKind::False,
233            "finally" => ActionScriptSyntaxKind::Finally,
234            "for" => ActionScriptSyntaxKind::For,
235            "function" => ActionScriptSyntaxKind::Function,
236            "if" => ActionScriptSyntaxKind::If,
237            "implements" => ActionScriptSyntaxKind::Implements,
238            "import" => ActionScriptSyntaxKind::Import,
239            "in" => ActionScriptSyntaxKind::In,
240            "instanceof" => ActionScriptSyntaxKind::Instanceof,
241            "interface" => ActionScriptSyntaxKind::Interface,
242            "internal" => ActionScriptSyntaxKind::Internal,
243            "is" => ActionScriptSyntaxKind::Is,
244            "native" => ActionScriptSyntaxKind::Native,
245            "new" => ActionScriptSyntaxKind::New,
246            "null" => ActionScriptSyntaxKind::Null,
247            "package" => ActionScriptSyntaxKind::Package,
248            "private" => ActionScriptSyntaxKind::Private,
249            "protected" => ActionScriptSyntaxKind::Protected,
250            "public" => ActionScriptSyntaxKind::Public,
251            "return" => ActionScriptSyntaxKind::Return,
252            "static" => ActionScriptSyntaxKind::Static,
253            "super" => ActionScriptSyntaxKind::Super,
254            "switch" => ActionScriptSyntaxKind::Switch,
255            "this" => ActionScriptSyntaxKind::This,
256            "throw" => ActionScriptSyntaxKind::Throw,
257            "true" => ActionScriptSyntaxKind::True,
258            "try" => ActionScriptSyntaxKind::Try,
259            "typeof" => ActionScriptSyntaxKind::Typeof,
260            "use" => ActionScriptSyntaxKind::Use,
261            "var" => ActionScriptSyntaxKind::Var,
262            "void" => ActionScriptSyntaxKind::Void,
263            "while" => ActionScriptSyntaxKind::While,
264            "with" => ActionScriptSyntaxKind::With,
265            "each" => ActionScriptSyntaxKind::Each,
266            "get" => ActionScriptSyntaxKind::Get,
267            "set" => ActionScriptSyntaxKind::Set,
268            "namespace" => ActionScriptSyntaxKind::Namespace,
269            "include" => ActionScriptSyntaxKind::Include,
270            "dynamic" => ActionScriptSyntaxKind::Dynamic,
271            "final" => ActionScriptSyntaxKind::Final,
272            "override" => ActionScriptSyntaxKind::Override,
273            "Array" => ActionScriptSyntaxKind::Array,
274            "Boolean" => ActionScriptSyntaxKind::Boolean,
275            "Date" => ActionScriptSyntaxKind::Date,
276            "Error" => ActionScriptSyntaxKind::Error,
277            "Function" => ActionScriptSyntaxKind::Function_,
278            "Number" => ActionScriptSyntaxKind::Number,
279            "Object" => ActionScriptSyntaxKind::Object,
280            "RegExp" => ActionScriptSyntaxKind::RegExp,
281            "String" => ActionScriptSyntaxKind::String_,
282            "uint" => ActionScriptSyntaxKind::Uint,
283            "Vector" => ActionScriptSyntaxKind::Vector,
284            "XML" => ActionScriptSyntaxKind::Xml,
285            "XMLList" => ActionScriptSyntaxKind::XmlList,
286            _ => ActionScriptSyntaxKind::Identifier,
287        };
288
289        state.add_token(kind, start, end);
290        true
291    }
292
293    fn lex_operator_or_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
294        let start = state.get_position();
295        let first = match state.peek() {
296            Some(c) => c,
297            None => return false,
298        };
299
300        let kind = match first {
301            '+' => {
302                state.advance(1);
303                match state.peek() {
304                    Some('=') => {
305                        state.advance(1);
306                        ActionScriptSyntaxKind::PlusAssign
307                    }
308                    Some('+') => {
309                        state.advance(1);
310                        ActionScriptSyntaxKind::Increment
311                    }
312                    _ => ActionScriptSyntaxKind::Plus,
313                }
314            }
315            '-' => {
316                state.advance(1);
317                match state.peek() {
318                    Some('=') => {
319                        state.advance(1);
320                        ActionScriptSyntaxKind::MinusAssign
321                    }
322                    Some('-') => {
323                        state.advance(1);
324                        ActionScriptSyntaxKind::Decrement
325                    }
326                    Some('>') => {
327                        state.advance(1);
328                        ActionScriptSyntaxKind::Arrow
329                    }
330                    _ => ActionScriptSyntaxKind::Minus,
331                }
332            }
333            '*' => {
334                state.advance(1);
335                if state.peek() == Some('=') {
336                    state.advance(1);
337                    ActionScriptSyntaxKind::StarAssign
338                }
339                else {
340                    ActionScriptSyntaxKind::Star
341                }
342            }
343            '/' => {
344                state.advance(1);
345                if state.peek() == Some('=') {
346                    state.advance(1);
347                    ActionScriptSyntaxKind::SlashAssign
348                }
349                else {
350                    ActionScriptSyntaxKind::Slash
351                }
352            }
353            '%' => {
354                state.advance(1);
355                if state.peek() == Some('=') {
356                    state.advance(1);
357                    ActionScriptSyntaxKind::PercentAssign
358                }
359                else {
360                    ActionScriptSyntaxKind::Percent
361                }
362            }
363            '=' => {
364                state.advance(1);
365                match state.peek() {
366                    Some('=') => {
367                        state.advance(1);
368                        if state.peek() == Some('=') {
369                            state.advance(1);
370                            ActionScriptSyntaxKind::EqualEqualEqual
371                        }
372                        else {
373                            ActionScriptSyntaxKind::EqualEqual
374                        }
375                    }
376                    _ => ActionScriptSyntaxKind::Equal,
377                }
378            }
379            '!' => {
380                state.advance(1);
381                match state.peek() {
382                    Some('=') => {
383                        state.advance(1);
384                        if state.peek() == Some('=') {
385                            state.advance(1);
386                            ActionScriptSyntaxKind::NotEqualEqual
387                        }
388                        else {
389                            ActionScriptSyntaxKind::NotEqual
390                        }
391                    }
392                    _ => ActionScriptSyntaxKind::LogicalNot,
393                }
394            }
395            '<' => {
396                state.advance(1);
397                match state.peek() {
398                    Some('=') => {
399                        state.advance(1);
400                        ActionScriptSyntaxKind::LessEqual
401                    }
402                    Some('<') => {
403                        state.advance(1);
404                        if state.peek() == Some('=') {
405                            state.advance(1);
406                            ActionScriptSyntaxKind::LeftShiftAssign
407                        }
408                        else {
409                            ActionScriptSyntaxKind::LeftShift
410                        }
411                    }
412                    _ => ActionScriptSyntaxKind::LessThan,
413                }
414            }
415            '>' => {
416                state.advance(1);
417                match state.peek() {
418                    Some('=') => {
419                        state.advance(1);
420                        ActionScriptSyntaxKind::GreaterEqual
421                    }
422                    Some('>') => {
423                        state.advance(1);
424                        match state.peek() {
425                            Some('>') => {
426                                state.advance(1);
427                                if state.peek() == Some('=') {
428                                    state.advance(1);
429                                    ActionScriptSyntaxKind::UnsignedRightShiftAssign
430                                }
431                                else {
432                                    ActionScriptSyntaxKind::UnsignedRightShift
433                                }
434                            }
435                            Some('=') => {
436                                state.advance(1);
437                                ActionScriptSyntaxKind::RightShiftAssign
438                            }
439                            _ => ActionScriptSyntaxKind::RightShift,
440                        }
441                    }
442                    _ => ActionScriptSyntaxKind::GreaterThan,
443                }
444            }
445            '&' => {
446                state.advance(1);
447                match state.peek() {
448                    Some('&') => {
449                        state.advance(1);
450                        ActionScriptSyntaxKind::LogicalAnd
451                    }
452                    Some('=') => {
453                        state.advance(1);
454                        ActionScriptSyntaxKind::BitwiseAndAssign
455                    }
456                    _ => ActionScriptSyntaxKind::BitwiseAnd,
457                }
458            }
459            '|' => {
460                state.advance(1);
461                match state.peek() {
462                    Some('|') => {
463                        state.advance(1);
464                        ActionScriptSyntaxKind::LogicalOr
465                    }
466                    Some('=') => {
467                        state.advance(1);
468                        ActionScriptSyntaxKind::BitwiseOrAssign
469                    }
470                    _ => ActionScriptSyntaxKind::BitwiseOr,
471                }
472            }
473            '^' => {
474                state.advance(1);
475                if state.peek() == Some('=') {
476                    state.advance(1);
477                    ActionScriptSyntaxKind::BitwiseXorAssign
478                }
479                else {
480                    ActionScriptSyntaxKind::BitwiseXor
481                }
482            }
483            '~' => {
484                state.advance(1);
485                ActionScriptSyntaxKind::BitwiseNot
486            }
487            '?' => {
488                state.advance(1);
489                ActionScriptSyntaxKind::Question
490            }
491            ':' => {
492                state.advance(1);
493                ActionScriptSyntaxKind::Colon
494            }
495            '(' => {
496                state.advance(1);
497                ActionScriptSyntaxKind::LeftParen
498            }
499            ')' => {
500                state.advance(1);
501                ActionScriptSyntaxKind::RightParen
502            }
503            '{' => {
504                state.advance(1);
505                ActionScriptSyntaxKind::LeftBrace
506            }
507            '}' => {
508                state.advance(1);
509                ActionScriptSyntaxKind::RightBrace
510            }
511            '[' => {
512                state.advance(1);
513                ActionScriptSyntaxKind::LeftBracket
514            }
515            ']' => {
516                state.advance(1);
517                ActionScriptSyntaxKind::RightBracket
518            }
519            ';' => {
520                state.advance(1);
521                ActionScriptSyntaxKind::Semicolon
522            }
523            ',' => {
524                state.advance(1);
525                ActionScriptSyntaxKind::Comma
526            }
527            '.' => {
528                state.advance(1);
529                ActionScriptSyntaxKind::Dot
530            }
531            '@' => {
532                state.advance(1);
533                ActionScriptSyntaxKind::At
534            }
535            '#' => {
536                state.advance(1);
537                ActionScriptSyntaxKind::Hash
538            }
539            '$' => {
540                state.advance(1);
541                ActionScriptSyntaxKind::Dollar
542            }
543            '\\' => {
544                state.advance(1);
545                ActionScriptSyntaxKind::Backslash
546            }
547            '\'' => {
548                state.advance(1);
549                ActionScriptSyntaxKind::Quote
550            }
551            '"' => {
552                state.advance(1);
553                ActionScriptSyntaxKind::DoubleQuote
554            }
555            '`' => {
556                state.advance(1);
557                ActionScriptSyntaxKind::Backtick
558            }
559            _ => return false,
560        };
561
562        let end = state.get_position();
563        state.add_token(kind, start, end);
564        true
565    }
566
567    fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
568        let start = state.get_position();
569
570        // normal string: "..." or '...'
571        if state.current() == Some('"') || state.current() == Some('\'') {
572            let quote_char = state.current().unwrap();
573            state.advance(1);
574            let mut escaped = false;
575            while let Some(ch) = state.peek() {
576                if ch == quote_char && !escaped {
577                    state.advance(1); // consume closing quote
578                    break;
579                }
580                state.advance(ch.len_utf8());
581                if escaped {
582                    escaped = false;
583                    continue;
584                }
585                if ch == '\\' {
586                    escaped = true;
587                    continue;
588                }
589                if ch == '\n' || ch == '\r' {
590                    break;
591                }
592            }
593            state.add_token(ActionScriptSyntaxKind::StringLiteral, start, state.get_position());
594            return true;
595        }
596        false
597    }
598
599    fn lex_char_literal<S: Source>(&self, state: &mut State<S>) -> bool {
600        let start = state.get_position();
601        if state.peek() != Some('\'') {
602            return false;
603        }
604
605        state.advance(1); // consume opening quote
606        if let Some('\\') = state.peek() {
607            state.advance(1); // consume backslash
608            if let Some(escaped) = state.peek() {
609                state.advance(escaped.len_utf8()); // consume escaped character
610            }
611        }
612        else if let Some(ch) = state.peek() {
613            state.advance(ch.len_utf8()); // consume character
614        }
615
616        if state.peek() == Some('\'') {
617            state.advance(1); // consume closing quote
618            state.add_token(ActionScriptSyntaxKind::CharLiteral, start, state.get_position());
619            return true;
620        }
621
622        // Reset position if not a valid char literal
623        state.set_position(start);
624        false
625    }
626}