Skip to main content

obeli_sk_boa_parser/parser/
mod.rs

1//! Boa parser implementation.
2
3mod cursor;
4mod expression;
5mod statement;
6
7pub(crate) mod function;
8
9#[cfg(test)]
10mod tests;
11
12use crate::{
13    Error, Source,
14    error::ParseResult,
15    lexer::{Error as LexError, InputElement},
16    parser::{
17        cursor::Cursor,
18        function::{FormalParameters, FunctionStatementList},
19    },
20    source::ReadChar,
21};
22use boa_ast::{
23    Position, StatementList,
24    function::{FormalParameterList, FunctionBody},
25    operations::{
26        ContainsSymbol, all_private_identifiers_valid, check_labels, contains,
27        contains_invalid_object_literal, lexically_declared_names, var_declared_names,
28    },
29    scope::Scope,
30};
31use boa_interner::{Interner, Sym};
32use rustc_hash::FxHashSet;
33use std::path::Path;
34
35use self::statement::ModuleItemList;
36
37type ScriptParseOutput = (boa_ast::Script, boa_ast::SourceText);
38type ModuleParseOutput = (boa_ast::Module, boa_ast::SourceText);
39
40/// Trait implemented by parsers.
41///
42/// This makes it possible to abstract over the underlying implementation of a parser.
43trait TokenParser<R>: Sized
44where
45    R: ReadChar,
46{
47    /// Output type for the parser.
48    type Output; // = Node; waiting for https://github.com/rust-lang/rust/issues/29661
49
50    /// Parses the token stream using the current parser.
51    ///
52    /// This method needs to be provided by the implementor type.
53    ///
54    /// # Errors
55    ///
56    /// It will fail if the cursor is not placed at the beginning of the expected non-terminal.
57    fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output>;
58}
59
60/// Boolean representing if the parser should allow a `yield` keyword.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62struct AllowYield(bool);
63
64impl From<bool> for AllowYield {
65    fn from(allow: bool) -> Self {
66        Self(allow)
67    }
68}
69
70/// Boolean representing if the parser should allow a `await` keyword.
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72struct AllowAwait(bool);
73
74impl From<bool> for AllowAwait {
75    fn from(allow: bool) -> Self {
76        Self(allow)
77    }
78}
79
80/// Boolean representing if the parser should allow a `in` keyword.
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82struct AllowIn(bool);
83
84impl From<bool> for AllowIn {
85    fn from(allow: bool) -> Self {
86        Self(allow)
87    }
88}
89
90/// Boolean representing if the parser should allow a `return` keyword.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92struct AllowReturn(bool);
93
94impl From<bool> for AllowReturn {
95    fn from(allow: bool) -> Self {
96        Self(allow)
97    }
98}
99
100/// Boolean representing if the parser should allow a `default` keyword.
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102struct AllowDefault(bool);
103
104impl From<bool> for AllowDefault {
105    fn from(allow: bool) -> Self {
106        Self(allow)
107    }
108}
109
110/// Parser for the ECMAScript language.
111///
112/// This parser implementation tries to be conformant to the most recent
113/// [ECMAScript language specification], and it also implements some legacy features like
114/// [labelled functions][label] or [duplicated block-level function definitions][block].
115///
116/// [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-source-code
117/// [label]: https://tc39.es/ecma262/#sec-labelled-function-declarations
118/// [block]: https://tc39.es/ecma262/#sec-block-duplicates-allowed-static-semantics
119#[derive(Debug)]
120pub struct Parser<'a, R> {
121    /// Path to the source being parsed.
122    #[allow(unused)] // Good to have for future improvements.
123    path: Option<&'a Path>,
124    /// Cursor of the parser, pointing to the lexer and used to get tokens for the parser.
125    cursor: Cursor<R>,
126}
127
128impl<'a, R: ReadChar> Parser<'a, R> {
129    /// Create a new `Parser` with a `Source` as the input to parse.
130    pub fn new(source: Source<'a, R>) -> Self {
131        Self {
132            path: source.path,
133            cursor: Cursor::new(source.reader),
134        }
135    }
136
137    /// Parse the full input as a [ECMAScript Script][spec] into the boa AST representation without source text.
138    /// The resulting `Script` can be compiled into boa bytecode and executed in the boa vm.
139    ///
140    /// # Errors
141    ///
142    /// Will return `Err` on any parsing error, including invalid reads of the bytes being parsed.
143    ///
144    /// [spec]: https://tc39.es/ecma262/#prod-Script
145    pub fn parse_script(
146        &mut self,
147        scope: &Scope,
148        interner: &mut Interner,
149    ) -> ParseResult<boa_ast::Script> {
150        self.parse_script_with_source(scope, interner).map(|x| x.0)
151    }
152
153    /// Parse the full input as a [ECMAScript Script][spec] into the boa AST representation with source text.
154    /// The resulting `Script` can be compiled into boa bytecode and executed in the boa vm.
155    ///
156    /// # Errors
157    ///
158    /// Will return `Err` on any parsing error, including invalid reads of the bytes being parsed.
159    ///
160    /// [spec]: https://tc39.es/ecma262/#prod-Script
161    pub fn parse_script_with_source(
162        &mut self,
163        scope: &Scope,
164        interner: &mut Interner,
165    ) -> ParseResult<ScriptParseOutput> {
166        self.cursor.set_goal(InputElement::HashbangOrRegExp);
167        let (mut ast, source) = ScriptParser::new(false).parse(&mut self.cursor, interner)?;
168        if let Err(reason) = ast.analyze_scope(scope, interner) {
169            return Err(Error::scope_analysis(reason));
170        }
171        Ok((ast, source))
172    }
173
174    /// Parse the full input as an [ECMAScript Module][spec] into the boa AST representation without source text.
175    /// The resulting `ModuleItemList` can be compiled into boa bytecode and executed in the boa vm.
176    ///
177    /// # Errors
178    ///
179    /// Will return `Err` on any parsing error, including invalid reads of the bytes being parsed.
180    ///
181    /// [spec]: https://tc39.es/ecma262/#prod-Module
182    pub fn parse_module(
183        &mut self,
184        scope: &Scope,
185        interner: &mut Interner,
186    ) -> ParseResult<boa_ast::Module>
187    where
188        R: ReadChar,
189    {
190        self.parse_module_with_source(scope, interner).map(|x| x.0)
191    }
192
193    /// Parse the full input as an [ECMAScript Module][spec] into the boa AST representation with source text.
194    /// The resulting `ModuleItemList` can be compiled into boa bytecode and executed in the boa vm.
195    ///
196    /// # Errors
197    ///
198    /// Will return `Err` on any parsing error, including invalid reads of the bytes being parsed.
199    ///
200    /// [spec]: https://tc39.es/ecma262/#prod-Module
201    pub fn parse_module_with_source(
202        &mut self,
203        scope: &Scope,
204        interner: &mut Interner,
205    ) -> ParseResult<ModuleParseOutput>
206    where
207        R: ReadChar,
208    {
209        self.cursor.set_goal(InputElement::HashbangOrRegExp);
210        let (mut module, source) = ModuleParser.parse(&mut self.cursor, interner)?;
211        if let Err(reason) = module.analyze_scope(scope, interner) {
212            return Err(Error::scope_analysis(reason));
213        }
214        Ok((module, source))
215    }
216
217    /// [`19.2.1.1 PerformEval ( x, strictCaller, direct )`][spec]
218    ///
219    /// Parses the source text input of an `eval` call.
220    ///
221    /// # Errors
222    ///
223    /// Will return `Err` on any parsing error, including invalid reads of the bytes being parsed.
224    ///
225    /// [spec]: https://tc39.es/ecma262/#sec-performeval
226    pub fn parse_eval(
227        &mut self,
228        direct: bool,
229        interner: &mut Interner,
230    ) -> ParseResult<ScriptParseOutput> {
231        self.cursor.set_goal(InputElement::HashbangOrRegExp);
232        ScriptParser::new(direct).parse(&mut self.cursor, interner)
233    }
234
235    /// Parses the full input as an [ECMAScript `FunctionBody`][spec] into the boa AST representation.
236    ///
237    /// # Errors
238    ///
239    /// Will return `Err` on any parsing error, including invalid reads of the bytes being parsed.
240    ///
241    /// [spec]: https://tc39.es/ecma262/#prod-FunctionBody
242    pub fn parse_function_body(
243        &mut self,
244        interner: &mut Interner,
245        allow_yield: bool,
246        allow_await: bool,
247    ) -> ParseResult<FunctionBody> {
248        let mut parser = FunctionStatementList::new(allow_yield, allow_await, "function body");
249        parser.parse_full_input(true);
250        parser.parse(&mut self.cursor, interner)
251    }
252
253    /// Parses the full input as an [ECMAScript `FormalParameterList`][spec] into the boa AST representation.
254    ///
255    /// # Errors
256    ///
257    /// Will return `Err` on any parsing error, including invalid reads of the bytes being parsed.
258    ///
259    /// [spec]: https://tc39.es/ecma262/#prod-FormalParameterList
260    pub fn parse_formal_parameters(
261        &mut self,
262        interner: &mut Interner,
263        allow_yield: bool,
264        allow_await: bool,
265    ) -> ParseResult<FormalParameterList> {
266        FormalParameters::new(allow_yield, allow_await).parse(&mut self.cursor, interner)
267    }
268}
269
270impl<R> Parser<'_, R> {
271    /// Set the parser strict mode to true.
272    pub fn set_strict(&mut self)
273    where
274        R: ReadChar,
275    {
276        self.cursor.set_strict(true);
277    }
278
279    /// Set the parser JSON mode to true.
280    pub fn set_json_parse(&mut self)
281    where
282        R: ReadChar,
283    {
284        self.cursor.set_json_parse(true);
285    }
286
287    /// Set the unique identifier for the parser.
288    pub fn set_identifier(&mut self, identifier: u32)
289    where
290        R: ReadChar,
291    {
292        self.cursor.set_identifier(identifier);
293    }
294}
295
296/// Parses a full script.
297///
298/// More information:
299///  - [ECMAScript specification][spec]
300///
301/// [spec]: https://tc39.es/ecma262/#prod-Script
302#[derive(Debug, Clone, Copy)]
303pub struct ScriptParser {
304    direct_eval: bool,
305}
306
307impl ScriptParser {
308    /// Create a new `Script` parser.
309    #[inline]
310    const fn new(direct_eval: bool) -> Self {
311        Self { direct_eval }
312    }
313}
314
315impl<R> TokenParser<R> for ScriptParser
316where
317    R: ReadChar,
318{
319    type Output = ScriptParseOutput;
320
321    fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> {
322        let stmts =
323            ScriptBody::new(true, cursor.strict(), self.direct_eval).parse(cursor, interner)?;
324        let script = boa_ast::Script::new(stmts);
325
326        // It is a Syntax Error if the LexicallyDeclaredNames of ScriptBody contains any duplicate entries.
327        let mut lexical_names = FxHashSet::default();
328        for name in lexically_declared_names(&script) {
329            if !lexical_names.insert(name) {
330                return Err(Error::general(
331                    "lexical name declared multiple times",
332                    Position::new(1, 1),
333                ));
334            }
335        }
336
337        // It is a Syntax Error if any element of the LexicallyDeclaredNames of ScriptBody also occurs in the VarDeclaredNames of ScriptBody.
338        for name in var_declared_names(&script) {
339            if lexical_names.contains(&name) {
340                return Err(Error::general(
341                    "lexical name declared multiple times",
342                    Position::new(1, 1),
343                ));
344            }
345        }
346
347        let source = cursor.take_source();
348        Ok((script, source))
349    }
350}
351
352/// Parses a script body.
353///
354/// More information:
355///  - [ECMAScript specification][spec]
356///
357/// [spec]: https://tc39.es/ecma262/#prod-ScriptBody
358#[derive(Debug, Clone, Copy)]
359pub struct ScriptBody {
360    directive_prologues: bool,
361    strict: bool,
362    direct_eval: bool,
363}
364
365impl ScriptBody {
366    /// Create a new `ScriptBody` parser.
367    #[inline]
368    const fn new(directive_prologues: bool, strict: bool, direct_eval: bool) -> Self {
369        Self {
370            directive_prologues,
371            strict,
372            direct_eval,
373        }
374    }
375}
376
377impl<R> TokenParser<R> for ScriptBody
378where
379    R: ReadChar,
380{
381    type Output = StatementList;
382
383    fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> {
384        let (body, _end) = statement::StatementList::new(
385            false,
386            false,
387            false,
388            &[],
389            self.directive_prologues,
390            self.strict,
391        )
392        .parse(cursor, interner)?;
393
394        if !self.direct_eval {
395            // It is a Syntax Error if StatementList Contains super unless the source text containing super is eval
396            // code that is being processed by a direct eval.
397            // Additional early error rules for super within direct eval are defined in 19.2.1.1.
398            if contains(&body, ContainsSymbol::Super) {
399                return Err(Error::general("invalid super usage", Position::new(1, 1)));
400            }
401            // It is a Syntax Error if StatementList Contains NewTarget unless the source text containing NewTarget
402            // is eval code that is being processed by a direct eval.
403            // Additional early error rules for NewTarget in direct eval are defined in 19.2.1.1.
404            if contains(&body, ContainsSymbol::NewTarget) {
405                return Err(Error::general(
406                    "invalid new.target usage",
407                    Position::new(1, 1),
408                ));
409            }
410
411            // It is a Syntax Error if AllPrivateIdentifiersValid of StatementList with
412            // argument « » is false unless the source text containing ScriptBody is
413            // eval code that is being processed by a direct eval.
414            if !all_private_identifiers_valid(&body, Vec::new()) {
415                return Err(Error::general(
416                    "invalid private identifier usage",
417                    Position::new(1, 1),
418                ));
419            }
420        }
421
422        if let Err(error) = check_labels(&body) {
423            return Err(Error::lex(LexError::Syntax(
424                error.message(interner).into(),
425                Position::new(1, 1),
426            )));
427        }
428
429        if contains_invalid_object_literal(&body) {
430            return Err(Error::lex(LexError::Syntax(
431                "invalid object literal in script statement list".into(),
432                Position::new(1, 1),
433            )));
434        }
435
436        Ok(body)
437    }
438}
439
440/// Parses a full module.
441///
442/// More information:
443///  - [ECMAScript specification][spec]
444///
445/// [spec]: https://tc39.es/ecma262/#prod-Module
446#[derive(Debug, Clone, Copy)]
447struct ModuleParser;
448
449impl<R> TokenParser<R> for ModuleParser
450where
451    R: ReadChar,
452{
453    type Output = ModuleParseOutput;
454
455    fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> {
456        cursor.set_module();
457
458        let module = boa_ast::Module::new(ModuleItemList.parse(cursor, interner)?);
459
460        // It is a Syntax Error if the LexicallyDeclaredNames of ModuleItemList contains any duplicate entries.
461        let mut bindings = FxHashSet::default();
462        for name in lexically_declared_names(&module) {
463            if !bindings.insert(name) {
464                return Err(Error::general(
465                    format!(
466                        "lexical name `{}` declared multiple times",
467                        interner.resolve_expect(name)
468                    ),
469                    Position::new(1, 1),
470                ));
471            }
472        }
473
474        // It is a Syntax Error if any element of the LexicallyDeclaredNames of ModuleItemList also occurs in the
475        // VarDeclaredNames of ModuleItemList.
476        for name in var_declared_names(&module) {
477            if !bindings.insert(name) {
478                return Err(Error::general(
479                    format!(
480                        "lexical name `{}` declared multiple times",
481                        interner.resolve_expect(name)
482                    ),
483                    Position::new(1, 1),
484                ));
485            }
486        }
487
488        // It is a Syntax Error if the ExportedNames of ModuleItemList contains any duplicate entries.
489        {
490            let mut exported_names = FxHashSet::default();
491            for name in module.items().exported_names() {
492                if !exported_names.insert(name) {
493                    return Err(Error::general(
494                        format!(
495                            "exported name `{}` declared multiple times",
496                            interner.resolve_expect(name)
497                        ),
498                        Position::new(1, 1),
499                    ));
500                }
501            }
502        }
503
504        // It is a Syntax Error if any element of the ExportedBindings of ModuleItemList does not also occur in either
505        // the VarDeclaredNames of ModuleItemList, or the LexicallyDeclaredNames of ModuleItemList.
506        for name in module.items().exported_bindings() {
507            if !bindings.contains(&name) {
508                return Err(Error::general(
509                    format!(
510                        "could not find the exported binding `{}` in the declared names of the module",
511                        interner.resolve_expect(name)
512                    ),
513                    Position::new(1, 1),
514                ));
515            }
516        }
517
518        // It is a Syntax Error if ModuleItemList Contains super.
519        if contains(&module, ContainsSymbol::Super) {
520            return Err(Error::general(
521                "module cannot contain `super` on the top-level",
522                Position::new(1, 1),
523            ));
524        }
525
526        // It is a Syntax Error if ModuleItemList Contains NewTarget.
527        if contains(&module, ContainsSymbol::NewTarget) {
528            return Err(Error::general(
529                "module cannot contain `new.target` on the top-level",
530                Position::new(1, 1),
531            ));
532        }
533
534        // It is a Syntax Error if ContainsDuplicateLabels of ModuleItemList with argument « » is true.
535        // It is a Syntax Error if ContainsUndefinedBreakTarget of ModuleItemList with argument « » is true.
536        // It is a Syntax Error if ContainsUndefinedContinueTarget of ModuleItemList with arguments « » and « » is true.
537        check_labels(&module).map_err(|error| {
538            Error::lex(LexError::Syntax(
539                error.message(interner).into(),
540                Position::new(1, 1),
541            ))
542        })?;
543
544        // It is a Syntax Error if AllPrivateIdentifiersValid of ModuleItemList with argument « » is false.
545        if !all_private_identifiers_valid(&module, Vec::new()) {
546            return Err(Error::general(
547                "invalid private identifier usage",
548                Position::new(1, 1),
549            ));
550        }
551
552        let source = cursor.take_source();
553        Ok((module, source))
554    }
555}
556
557/// Helper to check if any parameter names are declared in the given list.
558fn name_in_lexically_declared_names(
559    bound_names: &[Sym],
560    lexical_names: &[Sym],
561    position: Position,
562    interner: &Interner,
563) -> ParseResult<()> {
564    for name in bound_names {
565        if lexical_names.contains(name) {
566            return Err(Error::general(
567                format!(
568                    "formal parameter `{}` declared in lexically declared names",
569                    interner.resolve_expect(*name)
570                ),
571                position,
572            ));
573        }
574    }
575    Ok(())
576}
577
578/// Trait to reduce boilerplate in the parser.
579trait OrAbrupt<T> {
580    /// Will convert an `Ok(None)` to an [`Error::AbruptEnd`] or return the inner type if not.
581    fn or_abrupt(self) -> ParseResult<T>;
582}
583
584impl<T> OrAbrupt<T> for ParseResult<Option<T>> {
585    fn or_abrupt(self) -> ParseResult<T> {
586        self?.ok_or(Error::AbruptEnd)
587    }
588}