parse_js/
ast.rs

1use crate::error::SyntaxError;
2use crate::error::SyntaxErrorType;
3use crate::loc::Loc;
4use crate::num::JsNumber;
5use crate::operator::OperatorName;
6use ahash::AHashMap;
7use core::fmt::Debug;
8use serde::Serialize;
9use serde::Serializer;
10use std::any::Any;
11use std::any::TypeId;
12use std::fmt;
13use std::fmt::Formatter;
14
15#[derive(Default)]
16pub struct NodeAssocData {
17  map: AHashMap<TypeId, Box<dyn Any>>,
18}
19
20impl NodeAssocData {
21  pub fn get<T: Any>(&self) -> Option<&T> {
22    let t = TypeId::of::<T>();
23    self.map.get(&t).map(|v| v.downcast_ref().unwrap())
24  }
25
26  pub fn set<T: Any>(&mut self, v: T) {
27    let t = TypeId::of::<T>();
28    self.map.insert(t, Box::from(v));
29  }
30}
31
32#[cfg(test)]
33mod tests {
34  use crate::ast::NodeAssocData;
35
36  #[test]
37  fn test_node_assoc_data() {
38    struct MyType(u32);
39    let mut assoc = NodeAssocData::default();
40    assoc.set(MyType(32));
41    let v = assoc.get::<MyType>().unwrap();
42    assert_eq!(v.0, 32);
43  }
44}
45
46pub struct Node {
47  // A location is not a SourceRange; consider that after some transformations, it's possible to create entirely new nodes that don't exist at all in the source code. Also, sometimes we cannot be bothered to set a location, or can only provide an approximate/best-effort location.
48  pub loc: Loc,
49  pub stx: Box<Syntax>,
50  pub assoc: NodeAssocData,
51}
52
53impl Node {
54  pub fn new(loc: Loc, stx: Syntax) -> Node {
55    Node {
56      loc,
57      stx: Box::new(stx),
58      assoc: NodeAssocData::default(),
59    }
60  }
61
62  /// Create an error at this node's location.
63  pub fn error(&self, typ: SyntaxErrorType) -> SyntaxError {
64    self.loc.error(typ, None)
65  }
66
67  pub fn as_ident(&self) -> &str {
68    match self.stx.as_ref() {
69      Syntax::IdentifierExpr { name } => name.as_str(),
70      _ => unreachable!(),
71    }
72  }
73}
74
75impl Debug for Node {
76  fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
77    self.stx.fmt(f)
78  }
79}
80
81impl Serialize for Node {
82  fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
83    self.stx.serialize(serializer)
84  }
85}
86
87// These are for readability only, and do not increase type safety or define different structures.
88type Declaration = Node;
89type Expression = Node;
90type Pattern = Node;
91type Statement = Node;
92
93#[derive(Eq, PartialEq, Clone, Copy, Debug, Serialize)]
94pub enum VarDeclMode {
95  Const,
96  Let,
97  Var,
98}
99
100#[derive(Debug, Serialize)]
101pub enum ArrayElement {
102  Single(Expression),
103  Rest(Expression),
104  Empty,
105}
106
107// WARNING: This enum must exist, and the two variants cannot be merged by representing Direct with an IdentifierExpr, as it's not a usage of a variable!
108#[derive(Debug, Serialize)]
109pub enum ClassOrObjectMemberKey {
110  // Identifier, keyword, string, or number.
111  // NOTE: This isn't used by ObjectMemberType::Shorthand.
112  Direct(String),
113  Computed(Expression),
114}
115
116#[derive(Debug, Serialize)]
117pub enum ClassOrObjectMemberValue {
118  Getter {
119    function: Node, // Always Function. `params` is empty.
120  },
121  Method {
122    function: Node, // Always Function.
123  },
124  Property {
125    // Must be Some if object, as shorthands are covered by ObjectMemberType::Shorthand (and are initialised).
126    initializer: Option<Expression>,
127  },
128  Setter {
129    function: Node, // Always Function. `params` contains exactly one ParamDecl with no `default_value` or `rest`.
130  },
131}
132
133#[derive(Debug, Serialize)]
134pub enum ObjectMemberType {
135  Valued {
136    key: ClassOrObjectMemberKey,
137    value: ClassOrObjectMemberValue,
138  },
139  Shorthand {
140    identifier: Node, // Always IdentifierExpr.
141  },
142  Rest {
143    value: Expression,
144  },
145}
146
147#[derive(Debug, Serialize)]
148pub struct ArrayPatternElement {
149  pub target: Pattern,
150  pub default_value: Option<Expression>,
151}
152
153#[derive(Debug, Serialize)]
154pub struct ExportName {
155  // For simplicity, we always set both fields; for shorthands, both nodes are identical.
156  pub target: String,
157  // IdentifierPattern.
158  pub alias: Pattern,
159}
160
161#[derive(Debug, Serialize)]
162pub enum ExportNames {
163  // `import * as name`
164  // `export * from "module"`
165  // `export * as name from "module"`
166  // IdentifierPattern.
167  All(Option<Pattern>),
168  // `import {a as b, c, default as e}`
169  // `export {a as default, b as c, d}`
170  // `export {default, a as b, c} from "module"`
171  // `default` is still a name, so we don't use an enum.
172  Specific(Vec<ExportName>),
173}
174
175#[derive(Debug, Serialize)]
176pub struct VariableDeclarator {
177  pub pattern: Pattern,
178  pub initializer: Option<Expression>,
179}
180
181#[derive(Debug, Serialize)]
182pub enum ForInit {
183  None,
184  Expression(Expression),
185  Declaration(Declaration),
186}
187
188#[derive(Debug, Serialize)]
189pub enum LiteralTemplatePart {
190  Substitution(Expression),
191  String(String),
192}
193
194#[derive(Debug, Serialize)]
195#[serde(tag = "$t")]
196pub enum Syntax {
197  // Patterns.
198  IdentifierPattern {
199    name: String,
200  },
201  // `const fn = (a: any, b: any, ...{ length, ...c }: any[]) => void 0` is allowed.
202  ArrayPattern {
203    // Unnamed elements can exist.
204    elements: Vec<Option<ArrayPatternElement>>,
205    rest: Option<Pattern>,
206  },
207  // For an object pattern, `...` must be followed by an identifier.
208  // `const fn = ({ a: { b = c } = d, ...e }: any) => void 0` is possible.
209  ObjectPattern {
210    // List of ObjectPatternProperty nodes.
211    properties: Vec<Node>,
212    // This must be IdentifierPattern, anything else is illegal.
213    rest: Option<Pattern>,
214  },
215  // Not really a pattern but functions similarly; separated out for easy replacement when minifying.
216  ClassOrFunctionName {
217    name: String,
218  },
219
220  // Functions.
221  // This common type exists for better downstream usage, as one type is easier to match on and wrangle than many different types (ArrowFunctionExpr, ClassMember::Method, FunctionDecl, etc.).
222  Function {
223    arrow: bool,
224    async_: bool,
225    generator: bool,
226    parameters: Vec<Declaration>, // Always ParamDecl.
227    body: Node, // Could be Expression if arrow function. Otherwise, it's FunctionBody.
228  },
229  // A function body is different from a block statement, as the scopes are different. This doesn't mean much at the parser level, but helps with downstream usages.
230  FunctionBody {
231    body: Vec<Statement>,
232  },
233
234  // Declarations.
235  ClassDecl {
236    export: bool,
237    export_default: bool,
238    name: Option<Node>, // Always ClassOrFunctionName. Name can only be omitted in a default export, although a default export class can still have a name.
239    extends: Option<Expression>,
240    members: Vec<Node>, // Always ClassMember.
241  },
242  FunctionDecl {
243    export: bool,
244    export_default: bool,
245    name: Option<Node>, // Always ClassOrFunctionName. Name can only be omitted in a default export, although a default export function can still have a name.
246    function: Node,     // Always Function.
247  },
248  ParamDecl {
249    rest: bool,
250    pattern: Pattern,
251    default_value: Option<Expression>,
252  },
253  VarDecl {
254    export: bool,
255    mode: VarDeclMode,
256    declarators: Vec<VariableDeclarator>,
257  },
258
259  // Expressions.
260  ArrowFunctionExpr {
261    parenthesised: bool,
262    function: Node, // Always Function.
263  },
264  BinaryExpr {
265    parenthesised: bool,
266    operator: OperatorName,
267    left: Expression,
268    right: Expression,
269  },
270  CallExpr {
271    optional_chaining: bool,
272    parenthesised: bool,
273    callee: Expression,
274    arguments: Vec<Node>,
275  },
276  ClassExpr {
277    parenthesised: bool,
278    name: Option<Node>,
279    extends: Option<Expression>,
280    members: Vec<Node>, // Always ClassMember.
281  },
282  ConditionalExpr {
283    parenthesised: bool,
284    test: Expression,
285    consequent: Expression,
286    alternate: Expression,
287  },
288  ComputedMemberExpr {
289    optional_chaining: bool,
290    object: Expression,
291    member: Expression,
292  },
293  FunctionExpr {
294    parenthesised: bool,
295    name: Option<Node>,
296    function: Node,
297  },
298  IdentifierExpr {
299    name: String,
300  },
301  ImportExpr {
302    module: Expression,
303  },
304  ImportMeta {},
305  JsxAttribute {
306    name: Expression,          // JsxName
307    value: Option<Expression>, // JsxExpressionContainer or JsxText
308  },
309  JsxElement {
310    // When an element name starts with a lowercase ASCII character, it's a built-in component like '<div>' or '<span>'.
311    // For easier differentiation, we use IdentifierExpr for user-defined components as they are references to symbols and built-in components are not.
312    // https://reactjs.org/docs/jsx-in-depth.html#user-defined-components-must-be-capitalized
313    name: Option<Expression>, // IdentifierExpr or JsxName or JsxMemberExpression; None if fragment
314    attributes: Vec<Expression>, // JsxAttribute or JsxSpreadAttribute; always empty if fragment
315    children: Vec<Expression>, // JsxElement or JsxExpressionContainer or JsxText
316  },
317  JsxExpressionContainer {
318    value: Expression,
319  },
320  JsxMemberExpression {
321    // This is a separate property to indicate it's required and for easier pattern matching.
322    base: Node, // Always IdentifierExpr
323    path: Vec<String>,
324  },
325  JsxName {
326    namespace: Option<String>,
327    name: String,
328  },
329  JsxSpreadAttribute {
330    value: Expression,
331  },
332  JsxText {
333    value: String,
334  },
335  LiteralArrayExpr {
336    elements: Vec<ArrayElement>,
337  },
338  LiteralBigIntExpr {
339    value: String,
340  },
341  LiteralBooleanExpr {
342    value: bool,
343  },
344  LiteralNull {},
345  LiteralNumberExpr {
346    value: JsNumber,
347  },
348  LiteralObjectExpr {
349    // List of ObjectMember nodes.
350    members: Vec<Node>,
351  },
352  LiteralRegexExpr {
353    value: String, // Including delimiter slashes and any flags.
354  },
355  LiteralStringExpr {
356    value: String,
357  },
358  LiteralTemplateExpr {
359    parts: Vec<LiteralTemplatePart>,
360  },
361  // Dedicated special type to easily distinguish when analysing and minifying. Also done to avoid using IdentifierExpr as right, which is incorrect (not a variable usage).
362  MemberExpr {
363    parenthesised: bool,
364    optional_chaining: bool,
365    left: Expression,
366    right: String,
367  },
368  SuperExpr {},
369  ThisExpr {},
370  TaggedTemplateExpr {
371    function: Expression,
372    parts: Vec<LiteralTemplatePart>,
373  },
374  UnaryExpr {
375    parenthesised: bool,
376    operator: OperatorName,
377    argument: Expression,
378  },
379  UnaryPostfixExpr {
380    parenthesised: bool,
381    operator: OperatorName,
382    argument: Expression,
383  },
384
385  // Statements.
386  BlockStmt {
387    body: Vec<Statement>,
388  },
389  BreakStmt {
390    label: Option<String>,
391  },
392  ContinueStmt {
393    label: Option<String>,
394  },
395  DebuggerStmt {},
396  DoWhileStmt {
397    condition: Expression,
398    body: Statement,
399  },
400  EmptyStmt {},
401  ExportDefaultExprStmt {
402    expression: Expression,
403  },
404  ExportListStmt {
405    names: ExportNames,
406    from: Option<String>,
407  },
408  ExpressionStmt {
409    expression: Expression,
410  },
411  IfStmt {
412    test: Expression,
413    consequent: Statement,
414    alternate: Option<Statement>,
415  },
416  ImportStmt {
417    // IdentifierPattern.
418    default: Option<Pattern>,
419    names: Option<ExportNames>,
420    module: String,
421  },
422  ForStmt {
423    init: ForInit,
424    condition: Option<Expression>,
425    post: Option<Expression>,
426    body: Statement, // Won't be BlockStmt, but ForBody instead. (However, could be another type of statement.)
427  },
428  ForInStmt {
429    // for-in and for-of statements can have `x`/`[x]`/`{x:a}`/etc. on the lhs or `var x`/`var [x]`/etc. on the lhs. But for the latter, while it's technically a Decl, it's always a VarDecl with exactly one declaration that has no initialiser. If you strip down VarDecl to this, it's basically just a VarDeclMode and a Pattern. Therefore, we can represent both a destructuring expr or a decl on the lhs with an Option<VarDeclMode> and a Pattern.
430    decl_mode: Option<VarDeclMode>,
431    pat: Pattern,
432    rhs: Expression,
433    body: Statement, // Won't be BlockStmt, but ForBody instead. (However, could be another type of statement.)
434  },
435  ForOfStmt {
436    await_: bool,
437    // See comment in ForInStmt.
438    decl_mode: Option<VarDeclMode>,
439    pat: Pattern,
440    rhs: Expression,
441    body: Statement, // Won't be BlockStmt, but ForBody instead. (However, could be another type of statement.)
442  },
443  LabelStmt {
444    name: String,
445    statement: Statement,
446  },
447  ReturnStmt {
448    value: Option<Expression>,
449  },
450  SwitchStmt {
451    test: Expression,
452    branches: Vec<Node>,
453  },
454  ThrowStmt {
455    value: Expression,
456  },
457  TryStmt {
458    wrapped: Statement,
459    // One of these must be present.
460    catch: Option<Node>,
461    finally: Option<Statement>,
462  },
463  WhileStmt {
464    condition: Expression,
465    body: Statement,
466  },
467
468  // Others.
469  TopLevel {
470    body: Vec<Statement>,
471  },
472  CallArg {
473    spread: bool,
474    value: Expression,
475  },
476  CatchBlock {
477    parameter: Option<Pattern>,
478    body: Vec<Statement>, // We don't want to use BlockStmt as the new block scope starts with the parameter, not the braces. This differentiation ensures BlockStmt specifically means a new scope, helpful for downstream usages. See also: FunctionBody.
479  },
480  ClassMember {
481    key: ClassOrObjectMemberKey,
482    static_: bool,
483    value: ClassOrObjectMemberValue,
484  },
485  // Similar purpose to CatchBlock and FunctionBody. (The scope for a `for` statement starts before the braces, so don't mix with BlockStmt.)
486  ForBody {
487    body: Vec<Statement>,
488  },
489  // This is a node instead of an enum so that we can replace it when minifying e.g. expanding shorthand to `key: value`.
490  ObjectMember {
491    typ: ObjectMemberType,
492  },
493  ObjectPatternProperty {
494    key: ClassOrObjectMemberKey,
495    // If `shorthand`, `key` is Direct and `target` is IdentifierPattern of same name. This way, there is always an IdentifierPattern that exists and can be visited, instead of also having to consider ClassOrObjectMemberKey::Direct as identifier if shorthand.
496    target: Pattern,
497    shorthand: bool,
498    default_value: Option<Expression>,
499  },
500  SwitchBranch {
501    // If None, it's `default`.
502    case: Option<Expression>,
503    body: Vec<Statement>,
504  },
505}