boa/syntax/lexer/
template.rs

1//! This module implements lexing for template literals used in the JavaScript programing language.
2
3use super::{Cursor, Error, Tokenizer};
4use crate::{
5    profiler::BoaProfiler,
6    syntax::lexer::string::{StringLiteral, UTF16CodeUnitsBuffer},
7    syntax::{
8        ast::{Position, Span},
9        lexer::{Token, TokenKind},
10    },
11};
12use std::io::{self, ErrorKind, Read};
13
14#[cfg(feature = "deser")]
15use serde::{Deserialize, Serialize};
16
17#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))]
18#[derive(Clone, PartialEq, Debug)]
19pub struct TemplateString {
20    /// The start position of the template string. Used to make lexer error if `to_owned_cooked` failed.
21    start_pos: Position,
22    /// The template string of template literal with argument `raw` true.
23    raw: Box<str>,
24}
25
26impl TemplateString {
27    pub fn new<R>(raw: R, start_pos: Position) -> Self
28    where
29        R: Into<Box<str>>,
30    {
31        Self {
32            start_pos,
33            raw: raw.into(),
34        }
35    }
36
37    /// Converts the raw template string into a mutable string slice.
38    ///
39    /// More information:
40    ///  - [ECMAScript reference][spec]
41    ///
42    /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
43    pub fn as_raw(&self) -> &str {
44        self.raw.as_ref()
45    }
46
47    /// Creats a new cooked template string. Returns a lexer error if it fails to cook the template string.
48    ///
49    /// More information:
50    ///  - [ECMAScript reference][spec]
51    ///
52    /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
53    pub fn to_owned_cooked(&self) -> Result<Box<str>, Error> {
54        let mut cursor = Cursor::with_position(self.raw.as_bytes(), self.start_pos);
55        let mut buf: Vec<u16> = Vec::new();
56
57        loop {
58            let ch_start_pos = cursor.pos();
59            let ch = cursor.next_char()?;
60
61            match ch {
62                Some(0x005C /* \ */) => {
63                    let escape_value = StringLiteral::take_escape_sequence_or_line_continuation(
64                        &mut cursor,
65                        ch_start_pos,
66                        true,
67                        true,
68                    )?;
69
70                    if let Some(escape_value) = escape_value {
71                        buf.push_code_point(escape_value);
72                    }
73                }
74                Some(ch) => {
75                    // The caller guarantees that sequences '`' and '${' never appear
76                    // LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
77                    // which matches the TV of <CR> <LF>
78                    buf.push_code_point(ch);
79                }
80                None => break,
81            }
82        }
83
84        Ok(buf.to_string_lossy().into())
85    }
86}
87
88/// Template literal lexing.
89///
90/// Expects: Initial ` to already be consumed by cursor.
91///
92/// More information:
93///  - [ECMAScript reference][spec]
94///  - [MDN documentation][mdn]
95///
96/// [spec]: https://tc39.es/ecma262/#sec-template-literals
97/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals
98#[derive(Debug, Clone, Copy)]
99pub(super) struct TemplateLiteral;
100
101impl<R> Tokenizer<R> for TemplateLiteral {
102    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
103    where
104        R: Read,
105    {
106        let _timer = BoaProfiler::global().start_event("TemplateLiteral", "Lexing");
107
108        let mut buf = Vec::new();
109        loop {
110            let ch = cursor.next_char()?.ok_or_else(|| {
111                Error::from(io::Error::new(
112                    ErrorKind::UnexpectedEof,
113                    "unterminated template literal",
114                ))
115            })?;
116
117            match ch {
118                0x0060 /* ` */ => {
119                    let raw = buf.to_string_lossy();
120                    let template_string = TemplateString::new(raw, start_pos);
121
122                    return Ok(Token::new(
123                        TokenKind::template_no_substitution(template_string),
124                        Span::new(start_pos, cursor.pos()),
125                    ));
126                }
127                0x0024 /* $ */ if cursor.next_is(b'{')? => {
128                    let raw = buf.to_string_lossy();
129                    let template_string = TemplateString::new(raw, start_pos);
130
131                    return Ok(Token::new(
132                        TokenKind::template_middle(template_string),
133                        Span::new(start_pos, cursor.pos()),
134                    ));
135                }
136                0x005C /* \ */ => {
137                    let escape_ch = cursor.peek()?.ok_or_else(|| {
138                        Error::from(io::Error::new(
139                            ErrorKind::UnexpectedEof,
140                            "unterminated escape sequence in literal",
141                        ))
142                    })?;
143
144                    buf.push(b'\\' as u16);
145                    match escape_ch {
146                        b'`' | b'$' | b'\\' => buf.push(cursor.next_byte()?.unwrap() as u16),
147                        _ => continue,
148                    }
149                }
150                ch => {
151                    buf.push_code_point(ch);
152                }
153            }
154        }
155    }
156}