use super::html::unescape_string;
use super::*;
#[derive(Debug)]
pub(crate) struct Cursor<'a> {
pub(crate) src: &'a str,
pub(crate) i: usize,
}
impl<'a> Cursor<'a> {
pub(super) fn new(src: &'a str) -> Self {
Self { src, i: 0 }
}
pub(super) fn eof(&self) -> bool {
self.i >= self.src.len()
}
pub(super) fn pos(&self) -> usize {
self.i
}
pub(super) fn set_pos(&mut self, pos: usize) {
self.i = pos;
}
pub(super) fn bytes(&self) -> &'a [u8] {
self.src.as_bytes()
}
pub(super) fn peek(&self) -> Option<u8> {
self.bytes().get(self.i).copied()
}
pub(super) fn consume_byte(&mut self, b: u8) -> bool {
if self.peek() == Some(b) {
self.i += 1;
true
} else {
false
}
}
pub(super) fn expect_byte(&mut self, b: u8) -> Result<()> {
if self.consume_byte(b) {
Ok(())
} else {
Err(self.parse_error_at(format!("expected '{}' at {}", b as char, self.i), self.i))
}
}
pub(super) fn consume_ascii(&mut self, token: &str) -> bool {
let bytes = self.bytes();
if self.i + token.len() > bytes.len() {
return false;
}
let got = &bytes[self.i..self.i + token.len()];
if got == token.as_bytes() {
self.i += token.len();
true
} else {
false
}
}
pub(super) fn expect_ascii(&mut self, token: &str) -> Result<()> {
if self.consume_ascii(token) {
Ok(())
} else {
Err(self.parse_error_at(format!("expected '{}' at {}", token, self.i), self.i))
}
}
pub(super) fn skip_ws(&mut self) {
self.skip_ws_and_comments()
}
pub(super) fn skip_ws_and_comments(&mut self) {
loop {
self.skip_plain_ws();
if self.consume_ascii("//") {
while let Some(b) = self.peek() {
self.i += 1;
if b == b'\n' {
break;
}
}
continue;
}
if self.consume_ascii("/*") {
while !self.eof() {
if self.consume_ascii("*/") {
break;
}
self.i += 1;
}
continue;
}
break;
}
}
pub(super) fn skip_plain_ws(&mut self) {
while let Some(b) = self.peek() {
if b.is_ascii_whitespace() {
self.i += 1;
} else {
break;
}
}
}
fn parse_error_at(&self, message: String, pos: usize) -> Error {
let clamped = pos.min(self.src.len());
let (line, column) = Self::line_column_at(self.src, clamped);
let snippet = Self::snippet_around(self.src, clamped);
Error::ScriptParse(format!(
"{message} (line {line}, column {column}, near `{snippet}`)"
))
}
fn line_column_at(src: &str, pos: usize) -> (usize, usize) {
let clamped = pos.min(src.len());
let mut line = 1usize;
let mut line_start = 0usize;
for (idx, ch) in src.char_indices() {
if idx >= clamped {
break;
}
if ch == '\n' {
line += 1;
line_start = idx + 1;
}
}
let column = src[line_start..clamped].chars().count() + 1;
(line, column)
}
fn clamp_left_boundary(src: &str, mut idx: usize) -> usize {
idx = idx.min(src.len());
while idx > 0 && !src.is_char_boundary(idx) {
idx -= 1;
}
idx
}
fn clamp_right_boundary(src: &str, mut idx: usize) -> usize {
idx = idx.min(src.len());
while idx < src.len() && !src.is_char_boundary(idx) {
idx += 1;
}
idx
}
fn snippet_around(src: &str, pos: usize) -> String {
let clamped = pos.min(src.len());
let start = Self::clamp_left_boundary(src, clamped.saturating_sub(24));
let end = Self::clamp_right_boundary(src, clamped.saturating_add(24));
let mut snippet = src
.get(start..end)
.unwrap_or_default()
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t");
if start > 0 {
snippet = format!("...{snippet}");
}
if end < src.len() {
snippet.push_str("...");
}
snippet
}
pub(super) fn parse_identifier(&mut self) -> Option<String> {
let bytes = self.bytes();
let start = self.i;
let first = *bytes.get(self.i)?;
if !(first == b'_' || first == b'$' || first.is_ascii_alphabetic()) {
return None;
}
self.i += 1;
while let Some(b) = bytes.get(self.i).copied() {
if b == b'_' || b == b'$' || b.is_ascii_alphanumeric() {
self.i += 1;
} else {
break;
}
}
self.src.get(start..self.i).map(|s| s.to_string())
}
pub(super) fn parse_string_literal(&mut self) -> Result<String> {
let quote = self
.peek()
.ok_or_else(|| self.parse_error_at("expected string literal".into(), self.i))?;
if quote != b'\'' && quote != b'"' {
return Err(
self.parse_error_at(format!("expected string literal at {}", self.i), self.i)
);
}
self.i += 1;
let start = self.i;
let bytes = self.bytes();
if self.i + 1 < bytes.len() && bytes[self.i] == quote && bytes[self.i + 1] == quote {
self.i += 2;
return Ok((quote as char).to_string());
}
while self.i < bytes.len() {
let b = bytes[self.i];
if b == b'\\' {
self.i += 2;
continue;
}
if b == quote {
let raw = self
.src
.get(start..self.i)
.ok_or_else(|| self.parse_error_at("invalid string literal".into(), self.i))?;
self.i += 1;
return Ok(unescape_string(raw));
}
self.i += 1;
}
Err(self.parse_error_at("unclosed string literal".into(), start))
}
pub(super) fn read_until_byte(&mut self, b: u8) -> Result<String> {
let start = self.i;
while let Some(current) = self.peek() {
if current == b {
return self
.src
.get(start..self.i)
.map(|s| s.to_string())
.ok_or_else(|| Error::ScriptParse("invalid substring".into()));
}
self.i += 1;
}
Err(Error::ScriptParse(format!(
"expected '{}' before EOF",
b as char
)))
}
pub(super) fn read_balanced_block(&mut self, open: u8, close: u8) -> Result<String> {
self.expect_byte(open)?;
let start = self.i;
let bytes = self.bytes();
if !matches!((open, close), (b'(', b')') | (b'[', b']') | (b'{', b'}')) {
return Err(self.parse_error_at(
format!(
"unsupported balanced block delimiter pair: '{}{}'",
open as char, close as char
),
start.saturating_sub(1),
));
}
let mut idx = self.i;
let mut scanner = JsLexScanner::new();
scanner.consume_significant_bytes(&[open]);
let is_closed = |scanner: &JsLexScanner| match (open, close) {
(b'(', b')') => scanner.in_normal() && scanner.paren == 0,
(b'[', b']') => scanner.in_normal() && scanner.bracket == 0,
(b'{', b'}') => scanner.in_normal() && scanner.brace == 0,
_ => false,
};
while idx < bytes.len() {
idx = scanner.advance(bytes, idx);
if is_closed(&scanner) {
let body = self
.src
.get(start..idx - 1)
.ok_or_else(|| Error::ScriptParse("invalid block".into()))?
.to_string();
self.i = idx;
return Ok(body);
}
}
Err(self.parse_error_at(
format!(
"unclosed block (open='{}', close='{}', mode={:?}, paren={}, bracket={}, brace={}, scanned={})",
open as char,
close as char,
scanner.mode,
scanner.paren,
scanner.bracket,
scanner.brace,
idx
),
idx,
))
}
}