use crate::lexer::{Cursor, Error, Token, TokenKind, Tokenizer, token::EscapeSequence};
use crate::source::ReadChar;
use boa_ast::{LinearSpan, Position, PositionGroup, Span};
use boa_interner::Interner;
use std::io::{self, ErrorKind};
#[derive(Debug, Clone, Copy)]
pub(super) struct StringLiteral {
terminator: StringTerminator,
}
impl StringLiteral {
pub(super) fn new(init: char) -> Self {
let terminator = match init {
'\'' => StringTerminator::SingleQuote,
'"' => StringTerminator::DoubleQuote,
_ => unreachable!(),
};
Self { terminator }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum StringTerminator {
SingleQuote,
DoubleQuote,
}
pub(crate) trait UTF16CodeUnitsBuffer {
fn push_code_point(&mut self, code_point: u32);
#[allow(dead_code)]
fn to_string_lossy(&self) -> String;
}
impl UTF16CodeUnitsBuffer for Vec<u16> {
fn push_code_point(&mut self, mut code_point: u32) {
if let Ok(cp) = code_point.try_into() {
self.push(cp);
return;
}
code_point -= 0x10000;
let cu1 = (code_point / 1024 + 0xD800)
.try_into()
.expect("decoded an u32 into two u16.");
let cu2 = (code_point % 1024 + 0xDC00)
.try_into()
.expect("decoded an u32 into two u16.");
self.push(cu1);
self.push(cu2);
}
fn to_string_lossy(&self) -> String {
String::from_utf16_lossy(self.as_slice())
}
}
impl<R> Tokenizer<R> for StringLiteral {
fn lex(
&mut self,
cursor: &mut Cursor<R>,
start_pos: PositionGroup,
interner: &mut Interner,
) -> Result<Token, Error>
where
R: ReadChar,
{
let (lit, span, escape_sequence) = Self::take_string_characters(
cursor,
start_pos.position(),
self.terminator,
cursor.strict(),
)?;
Ok(Token::new(
TokenKind::string_literal(interner.get_or_intern(&lit[..]), escape_sequence),
span,
LinearSpan::new(start_pos.linear_position(), cursor.linear_pos()),
))
}
}
impl StringLiteral {
pub(super) const fn is_line_terminator(ch: u32) -> bool {
matches!(
ch,
0x000A | 0x000D | 0x2028 | 0x2029
)
}
fn take_string_characters<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
terminator: StringTerminator,
strict: bool,
) -> Result<(Vec<u16>, Span, EscapeSequence), Error>
where
R: ReadChar,
{
let mut buf = Vec::new();
let mut escape_sequence = EscapeSequence::empty();
loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;
match ch {
Some(0x0027 ) if terminator == StringTerminator::SingleQuote => break,
Some(0x0022 ) if terminator == StringTerminator::DoubleQuote => break,
Some(0x005C ) => {
let (escape_value, escape) = Self::take_escape_sequence_or_line_continuation(
cursor,
ch_start_pos,
strict,
false,
)?;
escape_sequence |= escape;
if let Some(escape_value) = escape_value {
buf.push_code_point(escape_value);
}
}
Some(0x2028) => buf.push(0x2028 ),
Some(0x2029) => buf.push(0x2029 ),
Some(ch) if !Self::is_line_terminator(ch) => {
buf.push_code_point(ch);
}
_ => {
return Err(Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated string literal",
)));
}
}
}
Ok((buf, Span::new(start_pos, cursor.pos()), escape_sequence))
}
pub(super) fn take_escape_sequence_or_line_continuation<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
strict: bool,
is_template_literal: bool,
) -> Result<(Option<u32>, EscapeSequence), Error>
where
R: ReadChar,
{
let escape_ch = cursor.next_char()?.ok_or_else(|| {
Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated escape sequence in literal",
))
})?;
let escape_value = match escape_ch {
0x0062 => (Some(0x0008 ), EscapeSequence::OTHER),
0x0074 => (Some(0x0009 ), EscapeSequence::OTHER),
0x006E => (Some(0x000A ), EscapeSequence::OTHER),
0x0076 => (Some(0x000B ), EscapeSequence::OTHER),
0x0066 => (Some(0x000C ), EscapeSequence::OTHER),
0x0072 => (Some(0x000D ), EscapeSequence::OTHER),
0x0022 => (Some(0x0022 ), EscapeSequence::OTHER),
0x0027 => (Some(0x0027 ), EscapeSequence::OTHER),
0x005C => (Some(0x005C ), EscapeSequence::OTHER),
0x0030 if cursor
.peek_char()?
.filter(|c| (0x30..=0x39 ).contains(c))
.is_none() =>
(Some(0x0000 ), EscapeSequence::OTHER),
0x0078 => {
(Some(Self::take_hex_escape_sequence(cursor, start_pos)?), EscapeSequence::OTHER)
}
0x0075 => {
(Some(Self::take_unicode_escape_sequence(cursor, start_pos)?), EscapeSequence::OTHER)
}
0x0038 | 0x0039 => {
if is_template_literal {
return Err(Error::syntax(
"\\8 and \\9 are not allowed in template literal",
start_pos,
));
} else if strict {
return Err(Error::syntax(
"\\8 and \\9 are not allowed in strict mode",
start_pos,
));
}
(Some(escape_ch), EscapeSequence::NON_OCTAL_DECIMAL)
}
_ if (0x0030..=0x0037 ).contains(&escape_ch) => {
if is_template_literal {
return Err(Error::syntax(
"octal escape sequences are not allowed in template literal",
start_pos,
));
}
if strict {
return Err(Error::syntax(
"octal escape sequences are not allowed in strict mode",
start_pos,
));
}
(Some(Self::take_legacy_octal_escape_sequence(
cursor,
escape_ch.try_into().expect("an ascii char must not fail to convert"),
)?), EscapeSequence::LEGACY_OCTAL)
}
_ if Self::is_line_terminator(escape_ch) => {
(None, EscapeSequence::OTHER)
}
_ => {
(Some(escape_ch), EscapeSequence::OTHER)
}
};
Ok(escape_value)
}
pub(super) fn take_unicode_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
) -> Result<u32, Error>
where
R: ReadChar,
{
if cursor.next_if(0x7B )? {
let mut code_point_buf = Vec::with_capacity(6);
cursor.take_until(0x7D , &mut code_point_buf)?;
let mut s = String::with_capacity(code_point_buf.len());
for c in code_point_buf {
if let Some(c) = char::from_u32(c) {
s.push(c);
} else {
return Err(Error::syntax(
"malformed Unicode character escape sequence",
start_pos,
));
}
}
let Ok(code_point) = u32::from_str_radix(&s, 16) else {
return Err(Error::syntax(
"malformed Unicode character escape sequence",
start_pos,
));
};
if code_point > 0x10_FFFF {
return Err(Error::syntax(
"Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
start_pos,
));
}
Ok(code_point)
} else {
let mut buffer = [0u32; 4];
buffer[0] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
buffer[1] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
buffer[2] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
buffer[3] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
let mut s = String::with_capacity(buffer.len());
for c in buffer {
if let Some(c) = char::from_u32(c) {
s.push(c);
} else {
return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
}
}
let Ok(code_point) = u16::from_str_radix(&s, 16) else {
return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
};
Ok(u32::from(code_point))
}
}
fn take_hex_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
) -> Result<u32, Error>
where
R: ReadChar,
{
let mut buffer = [0u32; 2];
buffer[0] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
buffer[1] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
let mut s = String::with_capacity(buffer.len());
for c in buffer {
if let Some(c) = char::from_u32(c) {
s.push(c);
} else {
return Err(Error::syntax(
"invalid Hexadecimal escape sequence",
start_pos,
));
}
}
let Ok(code_point) = u16::from_str_radix(&s, 16) else {
return Err(Error::syntax(
"invalid Hexadecimal escape sequence",
start_pos,
));
};
Ok(u32::from(code_point))
}
fn take_legacy_octal_escape_sequence<R>(
cursor: &mut Cursor<R>,
init_byte: u8,
) -> Result<u32, Error>
where
R: ReadChar,
{
let mut code_point = u32::from(init_byte - b'0');
if let Some(c) = cursor.peek_char()?
&& (0x30..=0x37).contains(&c)
{
cursor.next_char()?;
code_point = (code_point * 8) + c - 0x30 ;
if (0x30..=0x33).contains(&init_byte) {
if let Some(c) = cursor.peek_char()?
&& (0x30..=0x37).contains(&c)
{
cursor.next_char()?;
code_point = (code_point * 8) + c - 0x30 ;
}
}
}
Ok(code_point)
}
}