Skip to main content

boa_parser/lexer/
token.rs

1//! Boa's implementation of all ECMAScript [Token]s.
2//!
3//! More information:
4//!  - [ECMAScript reference][spec]
5//!
6//! [spec]: https://tc39.es/ecma262/#sec-tokens
7
8use crate::lexer::template::TemplateString;
9use bitflags::bitflags;
10use boa_ast::{Keyword, LinearSpan, PositionGroup, Punctuator, Span, Spanned};
11use boa_interner::{Interner, Sym};
12use num_bigint::BigInt;
13
14/// This represents the smallest individual words, phrases, or characters that JavaScript can understand.
15///
16/// More information:
17///  - [ECMAScript reference][spec]
18///
19/// [spec]: https://tc39.es/ecma262/#sec-tokens
20#[derive(Debug, Clone, PartialEq)]
21pub struct Token {
22    /// The token kind, which contains the actual data of the token.
23    kind: TokenKind,
24    /// The token position in the original source code.
25    span: Span,
26    /// The token linear position in the original source code.
27    linear_span: LinearSpan,
28}
29
30impl Token {
31    /// Create a new detailed token from the token data, line number, column number, and linear position
32    #[inline]
33    #[must_use]
34    pub const fn new(kind: TokenKind, span: Span, linear_span: LinearSpan) -> Self {
35        Self {
36            kind,
37            span,
38            linear_span,
39        }
40    }
41
42    /// Create a new detailed token from the token data, line number and column number
43    #[inline]
44    #[must_use]
45    pub fn new_by_position_group(
46        kind: TokenKind,
47        start: PositionGroup,
48        end: PositionGroup,
49    ) -> Self {
50        Self::new(
51            kind,
52            Span::new(start.position(), end.position()),
53            LinearSpan::new(start.linear_position(), end.linear_position()),
54        )
55    }
56
57    /// Gets the kind of the token.
58    #[inline]
59    #[must_use]
60    pub const fn kind(&self) -> &TokenKind {
61        &self.kind
62    }
63
64    /// Gets the starting position group of the token.
65    #[inline]
66    #[must_use]
67    pub const fn start_group(&self) -> PositionGroup {
68        PositionGroup::new(self.span.start(), self.linear_span.start())
69    }
70
71    /// Gets the token span in the original source code.
72    #[inline]
73    #[must_use]
74    pub const fn linear_span(&self) -> LinearSpan {
75        self.linear_span
76    }
77
78    /// Converts the token to a `String`.
79    #[inline]
80    pub(crate) fn to_string(&self, interner: &Interner) -> String {
81        self.kind.to_string(interner)
82    }
83}
84
85impl Spanned for Token {
86    #[inline]
87    fn span(&self) -> Span {
88        self.span
89    }
90}
91
92/// Represents the type different types of numeric literals.
93#[derive(Clone, PartialEq, Debug)]
94pub enum Numeric {
95    /// A floating point number.
96    Rational(f64),
97
98    /// An integer.
99    Integer(i32),
100
101    /// A `BigInt`.
102    BigInt(Box<BigInt>),
103}
104
105impl From<f64> for Numeric {
106    #[inline]
107    fn from(n: f64) -> Self {
108        Self::Rational(n)
109    }
110}
111
112impl From<i32> for Numeric {
113    #[inline]
114    fn from(n: i32) -> Self {
115        Self::Integer(n)
116    }
117}
118
119impl From<BigInt> for Numeric {
120    #[inline]
121    fn from(n: BigInt) -> Self {
122        Self::BigInt(Box::new(n))
123    }
124}
125
126/// Represents the type of Token and the data it has inside.
127#[derive(Clone, PartialEq, Debug)]
128pub enum TokenKind {
129    /// A boolean literal, which is either `true` or `false`.
130    BooleanLiteral((bool, ContainsEscapeSequence)),
131
132    /// The end of the file.
133    EOF,
134
135    /// An [**identifier name**][spec].
136    ///
137    /// [spec]: https://tc39.es/ecma262/#prod-IdentifierName
138    IdentifierName((Sym, ContainsEscapeSequence)),
139
140    /// A [**private identifier**][spec].
141    ///
142    /// [spec]: https://tc39.es/ecma262/#prod-PrivateIdentifier
143    PrivateIdentifier(Sym),
144
145    /// A keyword and a flag if the keyword contains unicode escaped chars.
146    ///
147    /// For more information, see [`Keyword`].
148    Keyword((Keyword, bool)),
149
150    /// The [`null` literal][spec].
151    ///
152    /// [spec]: https://tc39.es/ecma262/#prod-NullLiteral
153    NullLiteral(ContainsEscapeSequence),
154
155    /// A numeric literal.
156    NumericLiteral(Numeric),
157
158    /// A piece of punctuation
159    Punctuator(Punctuator),
160
161    /// A [**string literal**][spec].
162    ///
163    /// [spec]: https://tc39.es/ecma262/#prod-StringLiteral
164    StringLiteral((Sym, EscapeSequence)),
165
166    /// A part of a template literal without substitution.
167    TemplateNoSubstitution(TemplateString),
168
169    /// The part of a template literal between substitutions
170    TemplateMiddle(TemplateString),
171
172    /// A regular expression, consisting of body and flags.
173    RegularExpressionLiteral(Sym, Sym),
174
175    /// Indicates a [**line terminator (`\n`)**][spec].
176    ///
177    /// [spec]: https://tc39.es/ecma262/#prod-LineTerminator
178    LineTerminator,
179
180    /// Indicates a comment, the content isn't stored.
181    Comment,
182}
183
184impl From<bool> for TokenKind {
185    #[inline]
186    fn from(oth: bool) -> Self {
187        Self::BooleanLiteral((oth, ContainsEscapeSequence(false)))
188    }
189}
190
191impl From<(Keyword, bool)> for TokenKind {
192    #[inline]
193    fn from(kw: (Keyword, bool)) -> Self {
194        Self::Keyword(kw)
195    }
196}
197
198impl From<Punctuator> for TokenKind {
199    #[inline]
200    fn from(punc: Punctuator) -> Self {
201        Self::Punctuator(punc)
202    }
203}
204
205impl From<Numeric> for TokenKind {
206    #[inline]
207    fn from(num: Numeric) -> Self {
208        Self::NumericLiteral(num)
209    }
210}
211
212impl TokenKind {
213    /// Creates a `BooleanLiteral` token kind.
214    #[inline]
215    #[must_use]
216    pub const fn boolean_literal(lit: bool) -> Self {
217        Self::BooleanLiteral((lit, ContainsEscapeSequence(false)))
218    }
219
220    /// Creates an `EOF` token kind.
221    #[inline]
222    #[must_use]
223    pub const fn eof() -> Self {
224        Self::EOF
225    }
226
227    /// Creates an `Identifier` token type.
228    #[inline]
229    #[must_use]
230    pub const fn identifier(ident: Sym) -> Self {
231        Self::IdentifierName((ident, ContainsEscapeSequence(false)))
232    }
233
234    /// Creates a `NumericLiteral` token kind.
235    #[must_use]
236    pub fn numeric_literal<L>(lit: L) -> Self
237    where
238        L: Into<Numeric>,
239    {
240        Self::NumericLiteral(lit.into())
241    }
242
243    /// Creates a `Punctuator` token type.
244    #[inline]
245    #[must_use]
246    pub const fn punctuator(punc: Punctuator) -> Self {
247        Self::Punctuator(punc)
248    }
249
250    /// Creates a `StringLiteral` token type.
251    #[inline]
252    #[must_use]
253    pub const fn string_literal(lit: Sym, escape_sequence: EscapeSequence) -> Self {
254        Self::StringLiteral((lit, escape_sequence))
255    }
256
257    /// Creates a `TemplateMiddle` token type.
258    #[inline]
259    #[must_use]
260    pub const fn template_middle(template_string: TemplateString) -> Self {
261        Self::TemplateMiddle(template_string)
262    }
263
264    /// Creates a `TemplateNoSubstitution` token type.
265    #[inline]
266    #[must_use]
267    pub const fn template_no_substitution(template_string: TemplateString) -> Self {
268        Self::TemplateNoSubstitution(template_string)
269    }
270
271    /// Creates a `RegularExpressionLiteral` token kind.
272    #[inline]
273    #[must_use]
274    pub const fn regular_expression_literal(body: Sym, flags: Sym) -> Self {
275        Self::RegularExpressionLiteral(body, flags)
276    }
277
278    /// Creates a `LineTerminator` token kind.
279    #[inline]
280    #[must_use]
281    pub const fn line_terminator() -> Self {
282        Self::LineTerminator
283    }
284
285    /// Creates a 'Comment' token kind.
286    #[inline]
287    #[must_use]
288    pub const fn comment() -> Self {
289        Self::Comment
290    }
291
292    /// Implements the `ToString` functionality for the `TokenKind`.
293    #[must_use]
294    pub fn to_string(&self, interner: &Interner) -> String {
295        match *self {
296            Self::BooleanLiteral((val, _)) => val.to_string(),
297            Self::EOF => "end of file".to_owned(),
298            Self::IdentifierName((ident, _)) => interner.resolve_expect(ident).to_string(),
299            Self::PrivateIdentifier(ident) => format!("#{}", interner.resolve_expect(ident)),
300            Self::Keyword((word, _)) => word.to_string(),
301            Self::NullLiteral(_) => "null".to_owned(),
302            Self::NumericLiteral(Numeric::Rational(num)) => num.to_string(),
303            Self::NumericLiteral(Numeric::Integer(num)) => num.to_string(),
304            Self::NumericLiteral(Numeric::BigInt(ref num)) => format!("{num}n"),
305            Self::Punctuator(punc) => punc.to_string(),
306            Self::StringLiteral((lit, _)) => interner.resolve_expect(lit).to_string(),
307            Self::TemplateNoSubstitution(ts) | Self::TemplateMiddle(ts) => {
308                interner.resolve_expect(ts.raw()).to_string()
309            }
310            Self::RegularExpressionLiteral(body, flags) => {
311                format!(
312                    "/{}/{}",
313                    interner.resolve_expect(body),
314                    interner.resolve_expect(flags),
315                )
316            }
317            Self::LineTerminator => "line terminator".to_owned(),
318            Self::Comment => "comment".to_owned(),
319        }
320    }
321}
322
323bitflags! {
324    /// Indicates the set of escape sequences a string contains.
325    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
326    pub struct EscapeSequence: u8 {
327        /// A legacy escape sequence starting with `0` - `7`.
328        ///
329        /// More information:
330        ///  - [ECMAScript reference][spec]
331        ///
332        /// [spec]: https://tc39.es/ecma262/#prod-LegacyOctalEscapeSequence
333        const LEGACY_OCTAL = 0b0000_0001;
334
335        /// A octal escape sequence starting with `8` - `9`.
336        ///
337        /// More information:
338        ///  - [ECMAScript reference][spec]
339        ///
340        /// [spec]: https://tc39.es/ecma262/#prod-NonOctalDecimalEscapeSequence
341        const NON_OCTAL_DECIMAL = 0b0000_0010;
342
343        /// A generic escape sequence, either single (`\t`), unicode (`\u1238`)
344        /// or a line continuation (`\<LF>`)
345        ///
346        /// More information:
347        /// - [ECMAScript reference][spec]
348        ///
349        /// [spec]: https://tc39.es/ecma262/#prod-LineContinuation
350        const OTHER = 0b0000_0100;
351    }
352
353}
354
355/// Indicates if an identifier contains an escape sequence.
356#[derive(Clone, Copy, Debug, PartialEq, Eq)]
357pub struct ContainsEscapeSequence(pub bool);