use pest::{Parser, RuleType, Token};
use std::cell::{Cell, OnceCell};
use std::sync::{Arc, Weak};
use crate::boo::Boo;
const NEWLINE_DEFINITIONS: [&str; 4] = ["\n", "\r\n", "\r", "\x0C"];
const WHITESPACE_DEFINITIONS: [&str; 6] = [" ", "\t", "\n", "\r\n", "\r", "\x0C"];
#[derive(Clone)]
pub struct SourceInfo {
filename: Option<Arc<str>>,
source: Arc<str>,
newline_indices: Box<[usize]>,
handle: OnceCell<Weak<Self>>,
}
#[derive(Debug, Clone)]
pub struct SourceLocation {
source_info: Arc<SourceInfo>,
pub idx: usize,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone)]
pub struct SourceSlice {
pub(crate) source_info: Arc<SourceInfo>,
pub(crate) start: SourceLocation,
pub(crate) end: SourceLocation,
}
#[derive(Debug, Clone)]
pub(crate) struct StackInfo<T> {
pub source_info: Arc<SourceInfo>,
pub rule: T,
pub positions: (SourceLocation, Option<SourceLocation>),
pub children: Vec<StackInfo<T>>,
}
#[derive(Debug, Clone)]
pub struct ParserToken<R: RuleType> {
value: ParserTokenValue<Self>,
rule: R,
start: SourceLocation,
end: SourceLocation,
}
#[derive(Debug, Clone)]
pub enum ParserTokenValue<T> {
Leaf,
Internal(Vec<T>),
}
#[derive(Debug, Clone, derive_more::From)]
pub enum ExpectError {
#[from]
SyntaxError(crate::syntax::CssExpectError),
#[from]
SelectorError(crate::selector::SelectorExpectError),
#[from]
Generic(String),
}
#[derive(Debug, Clone)]
pub struct TokenTracker<'a, R: RuleType> {
pub(crate) boo: crate::boo::Boo<'a, Vec<ParserToken<R>>>,
pub(crate) idx: Cell<usize>,
}
fn floored_binary_index(arr: &[usize], item: usize) -> Option<usize> {
let length = arr.len();
if item < arr[0] || item > arr[length - 1] {
return None;
}
let mut left = 0;
let mut right = length - 1;
let mut middle = length / 2;
let mut current = arr[middle];
while left <= right {
match current.cmp(&item) {
std::cmp::Ordering::Less => left = middle + 1,
std::cmp::Ordering::Equal => return Some(middle),
std::cmp::Ordering::Greater => right = middle - 1,
}
middle = (right + left) / 2;
current = arr[middle];
}
Some(middle)
}
impl SourceInfo {
pub fn new(source: Arc<str>) -> Arc<Self> {
let mut newline_indices = vec![0];
for i in 0..(source.len() - 1) {
if NEWLINE_DEFINITIONS
.iter()
.any(|&x| source[i..i + 2].starts_with(x))
{
newline_indices.push(i);
}
}
let new = Self {
filename: None,
source: source.into(),
newline_indices: newline_indices.into_boxed_slice(),
handle: OnceCell::new(),
};
let arc: Arc<Self> = Arc::new(new);
arc.handle
.set(Arc::downgrade(&arc))
.expect("OnceCell should only be initialized once");
arc
}
pub fn from_file(filepath: &std::path::Path) -> Arc<Self> {
let mut newline_indices = vec![0];
let source_raw: String =
std::fs::read_to_string(filepath).expect(&format!("Failed to read file {filepath:?}"));
let source: Arc<str> = source_raw.as_str().into();
let max_length = NEWLINE_DEFINITIONS.iter().map(|x| x.len()).max().unwrap();
for i in 0..(source.len() - 1) {
if NEWLINE_DEFINITIONS
.iter()
.any(|&x| source[i..i + max_length].starts_with(x))
{
newline_indices.push(i);
}
}
let new = Self {
filename: Some(Arc::from(filepath.to_string_lossy().into_owned())),
source,
newline_indices: newline_indices.into_boxed_slice(),
handle: OnceCell::new(),
};
let arc: Arc<Self> = Arc::new(new);
arc.handle
.set(Arc::downgrade(&arc))
.expect("OnceCell should only be initialized once");
arc
}
#[inline]
pub fn get_handle(&self) -> Arc<Self> {
self.handle.get().unwrap().upgrade().unwrap()
}
pub fn location_from_idx(&self, idx: usize) -> SourceLocation {
let mut line = floored_binary_index(&self.newline_indices, idx);
let line = line
.map(|x| x + 1)
.or_else(|| {
if idx < self.newline_indices[0] {
Some(1)
} else {
Some(self.newline_indices.len())
}
})
.unwrap();
if self.newline_indices[line - 1] > idx {
panic!(
"{} > {idx}\n{:?}",
self.newline_indices[line - 1],
self.newline_indices
);
}
SourceLocation {
source_info: self.get_handle(),
idx,
line,
column: idx - self.newline_indices[line - 1] + 1,
}
}
}
impl SourceLocation {
pub fn slice(&self, other: &Self) -> SourceSlice {
assert!(
Arc::ptr_eq(&self.source_info, &other.source_info),
"Cannot slice between 2 different sources!"
);
SourceSlice {
source_info: self.source_info.clone(),
start: self.clone(),
end: other.clone(),
}
}
}
impl SourceSlice {
#[inline]
pub fn get(&self) -> &str {
&self.source_info.source[self.start.idx..self.end.idx]
}
}
impl std::fmt::Debug for SourceInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "SourceInfo {{ ... }}")
}
}
impl std::fmt::Display for SourceSlice {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.get())
}
}
impl std::ops::Deref for SourceSlice {
type Target = str;
fn deref(&self) -> &Self::Target {
self.get()
}
}
impl std::cmp::PartialEq for SourceSlice {
fn eq(&self, other: &Self) -> bool {
Arc::ptr_eq(&self.source_info, &other.source_info)
&& self.start.idx == other.start.idx
&& self.end.idx == other.end.idx
}
}
impl std::cmp::Eq for SourceSlice {}
impl std::hash::Hash for SourceSlice {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
Arc::as_ptr(&self.source_info.source).hash(state);
self.start.idx.hash(state);
self.end.idx.hash(state);
}
}
impl<R: RuleType> StackInfo<R> {
fn new(source_info: Arc<SourceInfo>, rule: R, pos: SourceLocation) -> Self {
Self {
source_info,
rule,
positions: (pos, None),
children: Vec::new(),
}
}
}
impl<R: RuleType> ParserToken<R> {
fn new(value: StackInfo<R>) -> Self {
let token_val = if !value.children.is_empty() {
let mut children: Vec<Self> = value
.children
.into_iter()
.map(|x| Self::new(x))
.collect::<Vec<_>>();
ParserTokenValue::Internal(children)
} else {
ParserTokenValue::Leaf
};
Self {
value: token_val,
rule: value.rule,
start: value.positions.0,
end: value.positions.1.unwrap(),
}
}
pub fn is_leaf(&self) -> bool {
matches!(self.value, ParserTokenValue::Leaf)
}
pub fn get_source(&self) -> SourceSlice {
self.start.slice(&self.end)
}
pub fn get_indices(&self) -> (SourceLocation, SourceLocation) {
(self.start.clone(), self.end.clone())
}
pub fn get_rule(&self) -> R {
self.rule
}
pub fn get_children(&self) -> Option<&Vec<ParserToken<R>>> {
match &self.value {
ParserTokenValue::Leaf => None,
ParserTokenValue::Internal(vec) => Some(vec),
}
}
}
impl<'a, R: RuleType> TokenTracker<'a, R> {
pub fn new(tokens_boo: Boo<'a, Vec<ParserToken<R>>>) -> Self {
Self {
boo: tokens_boo,
idx: Cell::new(0),
}
}
#[inline]
pub fn peek(&self) -> Option<&ParserToken<R>> {
self.boo.get(self.idx.get())
}
#[inline]
pub(crate) fn fail_because<O, E>(&self, error: E) -> Result<O, E> {
self.idx.set(0.max(self.idx.get() - 1));
Err(error)
}
#[inline]
pub(crate) fn get_location(&self) -> Option<SourceSlice> {
self.boo
.get_ref()
.get(self.idx.get())
.or_else(|| self.boo.get_ref().get(self.idx.get() - 1))
.map(|x| x.get_source())
}
pub(crate) fn pop_front(&'a self) -> Option<&'a ParserToken<R>> {
let result = self.boo.get(self.idx.get());
if result.is_some() {
self.idx.set(self.idx.get() + 1);
}
result
}
pub(crate) fn pop_front_count(&'a self, count: usize) -> Option<Box<[&'a ParserToken<R>]>> {
if self.boo.len() < self.idx.get() + count {
return None;
}
let mut tokens = Vec::new();
tokens.reserve(count);
for i in 0..count {
tokens.push(self.boo.get(self.idx.get() + i).unwrap());
}
self.idx.set(self.idx.get() + count);
Some(tokens.into_boxed_slice())
}
#[inline]
pub fn len(&self) -> usize {
self.boo.len() - self.idx.get()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.boo.is_empty() || self.len() == 0
}
}
pub fn parse_source<R: RuleType, P: Parser<R>>(
source_info: Arc<SourceInfo>,
rule: R,
) -> Result<ParserToken<R>, pest::error::Error<R>> {
let pairs = P::parse(rule, &source_info.source)?;
let mut tokens = pairs.tokens();
let mut stack: Vec<StackInfo<R>> = Vec::new();
let mut root: StackInfo<R> = {
match tokens.next().unwrap() {
Token::Start { rule, pos } => {
let pos = source_info.location_from_idx(pos.pos());
StackInfo::new(source_info.clone(), rule.clone(), pos)
}
_ => unreachable!(),
}
};
for token in tokens {
match token {
Token::Start { rule, pos } => {
let pos = source_info.location_from_idx(pos.pos());
stack.push(StackInfo::new(source_info.clone(), rule.clone(), pos));
}
Token::End { rule, pos } => {
if let Some(mut css_token) = stack.pop() {
css_token.positions.1 = Some(source_info.location_from_idx(pos.pos()));
if let Some(parent) = stack.last_mut() {
parent.children.push(css_token);
} else {
root.children.push(css_token);
}
} else {
root.positions.1 = Some(source_info.location_from_idx(pos.pos()));
}
}
}
}
Ok(ParserToken::new(root))
}