1mod comment;
18mod cursor;
19pub mod error;
20mod identifier;
21mod number;
22mod operator;
23mod regex;
24mod spread;
25mod string;
26mod template;
27pub mod token;
28
29#[cfg(test)]
30mod tests;
31
32use self::{
33 comment::{MultiLineComment, SingleLineComment},
34 cursor::Cursor,
35 identifier::Identifier,
36 number::NumberLiteral,
37 operator::Operator,
38 regex::RegexLiteral,
39 spread::SpreadLiteral,
40 string::StringLiteral,
41 template::TemplateLiteral,
42};
43use crate::syntax::ast::{Punctuator, Span};
44pub use crate::{profiler::BoaProfiler, syntax::ast::Position};
45use core::convert::TryFrom;
46pub use error::Error;
47use std::io::Read;
48pub use token::{Token, TokenKind};
49
50trait Tokenizer<R> {
51 fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
53 where
54 R: Read;
55}
56
57#[derive(Debug)]
59pub struct Lexer<R> {
60 cursor: Cursor<R>,
61 goal_symbol: InputElement,
62}
63
64impl<R> Lexer<R> {
65 fn is_whitespace(ch: u32) -> bool {
74 matches!(
75 ch,
76 0x0020 | 0x0009 | 0x000B | 0x000C | 0x00A0 | 0xFEFF |
77 0x1680 | 0x2000..=0x200A | 0x202F | 0x205F | 0x3000
79 )
80 }
81
82 #[inline]
84 pub(crate) fn set_goal(&mut self, elm: InputElement) {
85 self.goal_symbol = elm;
86 }
87
88 #[inline]
90 pub(crate) fn get_goal(&self) -> InputElement {
91 self.goal_symbol
92 }
93
94 #[inline]
95 pub(super) fn strict_mode(&self) -> bool {
96 self.cursor.strict_mode()
97 }
98
99 #[inline]
100 pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
101 self.cursor.set_strict_mode(strict_mode)
102 }
103
104 #[inline]
106 pub fn new(reader: R) -> Self
107 where
108 R: Read,
109 {
110 Self {
111 cursor: Cursor::new(reader),
112 goal_symbol: Default::default(),
113 }
114 }
115
116 pub(crate) fn lex_slash_token(&mut self, start: Position) -> Result<Token, Error>
124 where
125 R: Read,
126 {
127 let _timer = BoaProfiler::global().start_event("lex_slash_token", "Lexing");
128
129 if let Some(c) = self.cursor.peek()? {
130 match c {
131 b'/' => {
132 self.cursor.next_byte()?.expect("/ token vanished"); SingleLineComment.lex(&mut self.cursor, start)
134 }
135 b'*' => {
136 self.cursor.next_byte()?.expect("* token vanished"); MultiLineComment.lex(&mut self.cursor, start)
138 }
139 ch => {
140 match self.get_goal() {
141 InputElement::Div | InputElement::TemplateTail => {
142 if ch == b'=' {
145 self.cursor.next_byte()?.expect("= token vanished"); Ok(Token::new(
148 Punctuator::AssignDiv.into(),
149 Span::new(start, self.cursor.pos()),
150 ))
151 } else {
152 Ok(Token::new(
153 Punctuator::Div.into(),
154 Span::new(start, self.cursor.pos()),
155 ))
156 }
157 }
158 InputElement::RegExp => {
159 RegexLiteral.lex(&mut self.cursor, start)
161 }
162 }
163 }
164 }
165 } else {
166 Err(Error::syntax(
167 "Abrupt end: Expecting Token /,*,= or regex",
168 start,
169 ))
170 }
171 }
172
173 #[allow(clippy::should_implement_trait)]
176 pub fn next(&mut self) -> Result<Option<Token>, Error>
177 where
178 R: Read,
179 {
180 let _timer = BoaProfiler::global().start_event("next()", "Lexing");
181
182 let (start, next_ch) = loop {
183 let start = self.cursor.pos();
184 if let Some(next_ch) = self.cursor.next_char()? {
185 if !Self::is_whitespace(next_ch) {
187 break (start, next_ch);
188 }
189 } else {
190 return Ok(None);
191 }
192 };
193
194 if let Ok(c) = char::try_from(next_ch) {
195 let token = match c {
196 '\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new(
197 TokenKind::LineTerminator,
198 Span::new(start, self.cursor.pos()),
199 )),
200 '"' | '\'' => StringLiteral::new(c).lex(&mut self.cursor, start),
201 '`' => TemplateLiteral.lex(&mut self.cursor, start),
202 ';' => Ok(Token::new(
203 Punctuator::Semicolon.into(),
204 Span::new(start, self.cursor.pos()),
205 )),
206 ':' => Ok(Token::new(
207 Punctuator::Colon.into(),
208 Span::new(start, self.cursor.pos()),
209 )),
210 '.' => {
211 if self.cursor.peek()?.map(|c| (b'0'..=b'9').contains(&c)) == Some(true) {
212 NumberLiteral::new(next_ch as u8).lex(&mut self.cursor, start)
213 } else {
214 SpreadLiteral::new().lex(&mut self.cursor, start)
215 }
216 }
217 '(' => Ok(Token::new(
218 Punctuator::OpenParen.into(),
219 Span::new(start, self.cursor.pos()),
220 )),
221 ')' => Ok(Token::new(
222 Punctuator::CloseParen.into(),
223 Span::new(start, self.cursor.pos()),
224 )),
225 ',' => Ok(Token::new(
226 Punctuator::Comma.into(),
227 Span::new(start, self.cursor.pos()),
228 )),
229 '{' => Ok(Token::new(
230 Punctuator::OpenBlock.into(),
231 Span::new(start, self.cursor.pos()),
232 )),
233 '}' => Ok(Token::new(
234 Punctuator::CloseBlock.into(),
235 Span::new(start, self.cursor.pos()),
236 )),
237 '[' => Ok(Token::new(
238 Punctuator::OpenBracket.into(),
239 Span::new(start, self.cursor.pos()),
240 )),
241 ']' => Ok(Token::new(
242 Punctuator::CloseBracket.into(),
243 Span::new(start, self.cursor.pos()),
244 )),
245 '/' => self.lex_slash_token(start),
246 '=' | '*' | '+' | '-' | '%' | '|' | '&' | '^' | '<' | '>' | '!' | '~' | '?' => {
247 Operator::new(next_ch as u8).lex(&mut self.cursor, start)
248 }
249 '\\' if self.cursor.peek()? == Some(b'u') => {
250 Identifier::new(c).lex(&mut self.cursor, start)
251 }
252 _ if Identifier::is_identifier_start(c as u32) => {
253 Identifier::new(c).lex(&mut self.cursor, start)
254 }
255 _ if c.is_digit(10) => {
256 NumberLiteral::new(next_ch as u8).lex(&mut self.cursor, start)
257 }
258 _ => {
259 let details = format!(
260 "unexpected '{}' at line {}, column {}",
261 c,
262 start.line_number(),
263 start.column_number()
264 );
265 Err(Error::syntax(details, start))
266 }
267 }?;
268
269 if token.kind() == &TokenKind::Comment {
270 self.next()
272 } else {
273 Ok(Some(token))
274 }
275 } else {
276 Err(Error::syntax(
277 format!(
278 "unexpected utf-8 char '\\u{}' at line {}, column {}",
279 next_ch,
280 start.line_number(),
281 start.column_number()
282 ),
283 start,
284 ))
285 }
286 }
287
288 pub(crate) fn lex_template(&mut self, start: Position) -> Result<Token, Error>
289 where
290 R: Read,
291 {
292 TemplateLiteral.lex(&mut self.cursor, start)
293 }
294}
295
296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum InputElement {
301 Div,
302 RegExp,
303 TemplateTail,
304}
305
306impl Default for InputElement {
307 fn default() -> Self {
308 InputElement::RegExp
309 }
310}