use crate::syntax::ast::Position;
use boa_profiler::Profiler;
use std::io::{self, Bytes, Error, ErrorKind, Read};
#[derive(Debug)]
pub(super) struct Cursor<R> {
iter: InnerIter<R>,
pos: Position,
strict_mode: bool,
}
impl<R> Cursor<R> {
#[inline]
pub(super) fn pos(&self) -> Position {
self.pos
}
#[inline]
pub(super) fn next_column(&mut self) {
let current_line = self.pos.line_number();
let next_column = self.pos.column_number() + 1;
self.pos = Position::new(current_line, next_column);
}
#[inline]
fn next_line(&mut self) {
let next_line = self.pos.line_number() + 1;
self.pos = Position::new(next_line, 1);
}
#[inline]
pub(super) fn strict_mode(&self) -> bool {
self.strict_mode
}
#[inline]
pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
self.strict_mode = strict_mode;
}
}
impl<R> Cursor<R>
where
R: Read,
{
#[inline]
pub(super) fn new(inner: R) -> Self {
Self {
iter: InnerIter::new(inner.bytes()),
pos: Position::new(1, 1),
strict_mode: false,
}
}
#[inline]
pub(super) fn with_position(inner: R, pos: Position) -> Self {
Self {
iter: InnerIter::new(inner.bytes()),
pos,
strict_mode: false,
}
}
#[inline]
pub(super) fn peek(&mut self) -> Result<Option<u8>, Error> {
let _timer = Profiler::global().start_event("cursor::peek()", "Lexing");
self.iter.peek_byte()
}
#[inline]
pub(super) fn peek_n(&mut self, n: u8) -> Result<u32, Error> {
let _timer = Profiler::global().start_event("cursor::peek_n()", "Lexing");
self.iter.peek_n_bytes(n)
}
#[inline]
pub(super) fn peek_char(&mut self) -> Result<Option<u32>, Error> {
let _timer = Profiler::global().start_event("cursor::peek_char()", "Lexing");
self.iter.peek_char()
}
#[inline]
pub(super) fn next_is(&mut self, byte: u8) -> io::Result<bool> {
let _timer = Profiler::global().start_event("cursor::next_is()", "Lexing");
Ok(match self.peek()? {
Some(next) if next == byte => {
let _ = self.next_byte()?;
true
}
_ => false,
})
}
#[inline]
pub(super) fn next_is_ascii_pred<F>(&mut self, pred: &F) -> io::Result<bool>
where
F: Fn(char) -> bool,
{
let _timer = Profiler::global().start_event("cursor::next_is_ascii_pred()", "Lexing");
Ok(match self.peek()? {
Some(byte) => match byte {
0..=0x7F => pred(char::from(byte)),
_ => false,
},
None => false,
})
}
#[allow(dead_code)]
#[inline]
pub(super) fn next_is_char_pred<F>(&mut self, pred: &F) -> io::Result<bool>
where
F: Fn(u32) -> bool,
{
let _timer = Profiler::global().start_event("cursor::next_is_char_pred()", "Lexing");
Ok(if let Some(peek) = self.peek_char()? {
pred(peek)
} else {
false
})
}
pub(super) fn take_until(&mut self, stop: u8, buf: &mut Vec<u8>) -> io::Result<()> {
let _timer = Profiler::global().start_event("cursor::take_until()", "Lexing");
loop {
if self.next_is(stop)? {
return Ok(());
} else if let Some(byte) = self.next_byte()? {
buf.push(byte);
} else {
return Err(io::Error::new(
ErrorKind::UnexpectedEof,
format!("Unexpected end of file when looking for character {stop}"),
));
}
}
}
pub(super) fn take_while_ascii_pred<F>(&mut self, buf: &mut Vec<u8>, pred: &F) -> io::Result<()>
where
F: Fn(char) -> bool,
{
let _timer = Profiler::global().start_event("cursor::take_while_ascii_pred()", "Lexing");
loop {
if !self.next_is_ascii_pred(pred)? {
return Ok(());
} else if let Some(byte) = self.next_byte()? {
buf.push(byte);
} else {
unreachable!();
}
}
}
#[allow(dead_code)]
pub(super) fn take_while_char_pred<F>(&mut self, buf: &mut Vec<u8>, pred: &F) -> io::Result<()>
where
F: Fn(u32) -> bool,
{
let _timer = Profiler::global().start_event("cursor::take_while_char_pred()", "Lexing");
loop {
if !self.next_is_char_pred(pred)? {
return Ok(());
} else if let Some(ch) = self.peek_char()? {
for _ in 0..utf8_len(ch) {
buf.push(
self.next_byte()?
.expect("already checked that the next character exists"),
);
}
} else {
unreachable!();
}
}
}
#[inline]
pub(super) fn fill_bytes(&mut self, buf: &mut [u8]) -> io::Result<()> {
let _timer = Profiler::global().start_event("cursor::fill_bytes()", "Lexing");
self.iter.fill_bytes(buf)
}
#[inline]
pub(crate) fn next_byte(&mut self) -> Result<Option<u8>, Error> {
let _timer = Profiler::global().start_event("cursor::next_byte()", "Lexing");
let byte = self.iter.next_byte()?;
match byte {
Some(b'\r') => {
if self.peek()? == Some(b'\n') {
let _next = self.iter.next_byte();
}
self.next_line();
}
Some(b'\n') => self.next_line(),
Some(0xE2) => {
let next_bytes = self.peek_n(2)?;
if next_bytes == 0xA8_80 || next_bytes == 0xA9_80 {
self.next_line();
} else {
self.next_column();
}
}
Some(b) if utf8_is_first_byte(b) => self.next_column(),
_ => {}
}
Ok(byte)
}
#[inline]
pub(crate) fn next_char(&mut self) -> Result<Option<u32>, Error> {
let _timer = Profiler::global().start_event("cursor::next_char()", "Lexing");
let ch = self.iter.next_char()?;
match ch {
Some(0xD) => {
if self.peek()? == Some(0xA) {
let _next = self.iter.next_byte();
}
self.next_line();
}
Some(0xA | 0x2028 | 0x2029) => self.next_line(),
Some(_) => self.next_column(),
_ => {}
}
Ok(ch)
}
}
#[derive(Debug)]
#[allow(clippy::option_option)]
struct InnerIter<R> {
iter: Bytes<R>,
num_peeked_bytes: u8,
peeked_bytes: u32,
peeked_char: Option<Option<u32>>,
}
impl<R> InnerIter<R> {
#[inline]
fn new(iter: Bytes<R>) -> Self {
Self {
iter,
num_peeked_bytes: 0,
peeked_bytes: 0,
peeked_char: None,
}
}
}
impl<R> InnerIter<R>
where
R: Read,
{
#[inline]
fn fill_bytes(&mut self, buf: &mut [u8]) -> io::Result<()> {
for byte in buf.iter_mut() {
*byte = self.next_byte()?.ok_or_else(|| {
io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF when filling buffer",
)
})?;
}
Ok(())
}
#[inline]
fn increment(&mut self, n: u32) -> Result<(), Error> {
for _ in 0..n {
if None == self.next_byte()? {
break;
}
}
Ok(())
}
#[inline]
pub(super) fn peek_byte(&mut self) -> Result<Option<u8>, Error> {
if self.num_peeked_bytes > 0 {
let byte = self.peeked_bytes as u8;
Ok(Some(byte))
} else {
match self.iter.next().transpose()? {
Some(byte) => {
self.num_peeked_bytes = 1;
self.peeked_bytes = u32::from(byte);
Ok(Some(byte))
}
None => Ok(None),
}
}
}
#[inline]
pub(super) fn peek_n_bytes(&mut self, n: u8) -> Result<u32, Error> {
while self.num_peeked_bytes < n && self.num_peeked_bytes < 4 {
match self.iter.next().transpose()? {
Some(byte) => {
self.peeked_bytes |= u32::from(byte) << (self.num_peeked_bytes * 8);
self.num_peeked_bytes += 1;
}
None => break,
};
}
match n {
0 => Ok(0),
1 => Ok(self.peeked_bytes & 0xFF),
2 => Ok(self.peeked_bytes & 0xFFFF),
3 => Ok(self.peeked_bytes & 0xFFFFFF),
_ => Ok(self.peeked_bytes),
}
}
#[inline]
pub(super) fn peek_char(&mut self) -> Result<Option<u32>, Error> {
if let Some(ch) = self.peeked_char {
Ok(ch)
} else {
let x = match self.peek_byte()? {
Some(b) if b < 128 => {
self.peeked_char = Some(Some(u32::from(b)));
return Ok(Some(u32::from(b)));
}
Some(b) => b,
None => {
self.peeked_char = None;
return Ok(None);
}
};
let init = utf8_first_byte(x, 2);
let y = (self.peek_n_bytes(2)? >> 8) as u8;
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = (self.peek_n_bytes(3)? >> 16) as u8;
let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
ch = init << 12 | y_z;
if x >= 0xF0 {
let w = (self.peek_n_bytes(4)? >> 24) as u8;
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
};
self.peeked_char = Some(Some(ch));
Ok(Some(ch))
}
}
#[inline]
fn next_byte(&mut self) -> io::Result<Option<u8>> {
self.peeked_char = None;
if self.num_peeked_bytes > 0 {
let byte = (self.peeked_bytes & 0xFF) as u8;
self.num_peeked_bytes -= 1;
self.peeked_bytes >>= 8;
Ok(Some(byte))
} else {
self.iter.next().transpose()
}
}
#[inline]
fn next_char(&mut self) -> io::Result<Option<u32>> {
if let Some(ch) = self.peeked_char.take() {
if let Some(c) = ch {
self.increment(utf8_len(c))?;
}
return Ok(ch);
}
let x = match self.next_byte()? {
Some(b) if b < 128 => return Ok(Some(u32::from(b))),
Some(b) => b,
None => return Ok(None),
};
let init = utf8_first_byte(x, 2);
let y = unwrap_or_0(self.next_byte()?);
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = unwrap_or_0(self.next_byte()?);
let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
ch = init << 12 | y_z;
if x >= 0xF0 {
let w = unwrap_or_0(self.next_byte()?);
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
};
Ok(Some(ch))
}
}
const CONT_MASK: u8 = 0b0011_1111;
#[inline]
fn utf8_first_byte(byte: u8, width: u32) -> u32 {
u32::from(byte & (0x7F >> width))
}
#[inline]
fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | u32::from(byte & CONT_MASK)
}
#[inline]
fn utf8_is_first_byte(byte: u8) -> bool {
byte <= 0x7F || (byte >> 6) == 0x11
}
#[inline]
fn unwrap_or_0(opt: Option<u8>) -> u8 {
opt.unwrap_or(0)
}
#[inline]
fn utf8_len(ch: u32) -> u32 {
if ch <= 0x7F {
1
} else if ch <= 0x7FF {
2
} else if ch <= 0xFFFF {
3
} else {
4
}
}