1use pest::{Parser, RuleType, Token};
2
3use std::cell::{Cell, OnceCell};
4use std::sync::{Arc, Weak};
5
6use crate::boo::Boo;
7
8const NEWLINE_DEFINITIONS: [&str; 4] = ["\n", "\r\n", "\r", "\x0C"];
9const WHITESPACE_DEFINITIONS: [&str; 6] = [" ", "\t", "\n", "\r\n", "\r", "\x0C"];
10
11#[derive(Clone)]
12pub struct SourceInfo {
13 filename: Option<Arc<str>>,
14 source: Arc<str>,
15 newline_indices: Box<[usize]>,
16 handle: OnceCell<Weak<Self>>,
17}
18
19#[derive(Debug, Clone)]
20pub struct SourceLocation {
21 source_info: Arc<SourceInfo>,
22 pub idx: usize,
23 pub line: usize,
25 pub column: usize,
27}
28
29#[derive(Debug, Clone)]
30pub struct SourceSlice {
31 pub(crate) source_info: Arc<SourceInfo>,
32 pub(crate) start: SourceLocation,
33 pub(crate) end: SourceLocation,
34}
35
36#[derive(Debug, Clone)]
37pub(crate) struct StackInfo<T> {
38 pub source_info: Arc<SourceInfo>,
39 pub rule: T,
40 pub positions: (SourceLocation, Option<SourceLocation>),
41 pub children: Vec<StackInfo<T>>,
42}
43
44#[derive(Debug, Clone)]
45pub struct ParserToken<R: RuleType> {
46 value: ParserTokenValue<Self>,
47 rule: R,
48 start: SourceLocation,
49 end: SourceLocation,
50}
51
52#[derive(Debug, Clone)]
53pub enum ParserTokenValue<T> {
54 Leaf,
55 Internal(Vec<T>),
56}
57
58#[derive(Debug, Clone, derive_more::From)]
59pub enum ExpectError {
60 #[from]
61 SyntaxError(crate::syntax::CssExpectError),
62 #[from]
63 SelectorError(crate::selector::SelectorExpectError),
64
65 #[from]
66 Generic(String),
67}
68
69#[derive(Debug, Clone)]
91pub struct TokenTracker<'a, R: RuleType> {
92 pub(crate) boo: crate::boo::Boo<'a, Vec<ParserToken<R>>>,
109
110 pub(crate) idx: Cell<usize>,
111}
112
113fn floored_binary_index(arr: &[usize], item: usize) -> Option<usize> {
170 let length = arr.len();
171
172 if item < arr[0] || item > arr[length - 1] {
173 return None;
174 }
175
176 let mut left = 0;
177 let mut right = length - 1;
178 let mut middle = length / 2;
179
180 let mut current = arr[middle];
181 while left <= right {
182 match current.cmp(&item) {
183 std::cmp::Ordering::Less => left = middle + 1,
184 std::cmp::Ordering::Equal => return Some(middle),
185 std::cmp::Ordering::Greater => right = middle - 1,
186 }
187 middle = (right + left) / 2;
188 current = arr[middle];
189 }
190
191 Some(middle)
192}
193
194impl SourceInfo {
195 pub fn new(source: Arc<str>) -> Arc<Self> {
196 let mut newline_indices = vec![0];
197
198 for i in 0..(source.len() - 1) {
199 if NEWLINE_DEFINITIONS
200 .iter()
201 .any(|&x| source[i..i + 2].starts_with(x))
202 {
203 newline_indices.push(i);
204 }
205 }
206
207 let new = Self {
208 filename: None,
209 source: source.into(),
210 newline_indices: newline_indices.into_boxed_slice(),
211 handle: OnceCell::new(),
212 };
213
214 let arc: Arc<Self> = Arc::new(new);
215 arc.handle
216 .set(Arc::downgrade(&arc))
217 .expect("OnceCell should only be initialized once");
218 arc
219 }
220
221 pub fn from_file(filepath: &std::path::Path) -> Arc<Self> {
222 let mut newline_indices = vec![0];
223
224 let source_raw: String =
225 std::fs::read_to_string(filepath).expect(&format!("Failed to read file {filepath:?}"));
226 let source: Arc<str> = source_raw.as_str().into();
227
228 let max_length = NEWLINE_DEFINITIONS.iter().map(|x| x.len()).max().unwrap();
229 for i in 0..(source.len() - 1) {
230 if NEWLINE_DEFINITIONS
231 .iter()
232 .any(|&x| source[i..i + max_length].starts_with(x))
233 {
234 newline_indices.push(i);
235 }
236 }
237
238 let new = Self {
239 filename: Some(Arc::from(filepath.to_string_lossy().into_owned())),
240 source,
241 newline_indices: newline_indices.into_boxed_slice(),
242 handle: OnceCell::new(),
243 };
244
245 let arc: Arc<Self> = Arc::new(new);
246 arc.handle
247 .set(Arc::downgrade(&arc))
248 .expect("OnceCell should only be initialized once");
249 arc
250 }
251
252 #[inline]
253 pub fn get_handle(&self) -> Arc<Self> {
254 self.handle.get().unwrap().upgrade().unwrap()
255 }
256
257 pub fn location_from_idx(&self, idx: usize) -> SourceLocation {
258 let mut line = floored_binary_index(&self.newline_indices, idx);
259 let line = line
260 .map(|x| x + 1)
261 .or_else(|| {
262 if idx < self.newline_indices[0] {
263 Some(1)
264 } else {
265 Some(self.newline_indices.len())
266 }
267 })
268 .unwrap();
269
270 if self.newline_indices[line - 1] > idx {
271 panic!(
272 "{} > {idx}\n{:?}",
273 self.newline_indices[line - 1],
274 self.newline_indices
275 );
276 }
277
278 SourceLocation {
279 source_info: self.get_handle(),
280 idx,
281 line,
282 column: idx - self.newline_indices[line - 1] + 1,
283 }
284 }
285}
286
287impl SourceLocation {
288 pub fn slice(&self, other: &Self) -> SourceSlice {
289 assert!(
290 Arc::ptr_eq(&self.source_info, &other.source_info),
291 "Cannot slice between 2 different sources!"
292 );
293 SourceSlice {
294 source_info: self.source_info.clone(),
295 start: self.clone(),
296 end: other.clone(),
297 }
298 }
299}
300
301impl SourceSlice {
302 #[inline]
303 pub fn get(&self) -> &str {
304 &self.source_info.source[self.start.idx..self.end.idx]
305 }
306}
307
308impl std::fmt::Debug for SourceInfo {
309 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
310 write!(f, "SourceInfo {{ ... }}")
311 }
312}
313
314impl std::fmt::Display for SourceSlice {
315 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316 write!(f, "{}", self.get())
317 }
318}
319
320impl std::ops::Deref for SourceSlice {
321 type Target = str;
322
323 fn deref(&self) -> &Self::Target {
324 self.get()
325 }
326}
327
328impl std::cmp::PartialEq for SourceSlice {
329 fn eq(&self, other: &Self) -> bool {
330 Arc::ptr_eq(&self.source_info, &other.source_info)
331 && self.start.idx == other.start.idx
332 && self.end.idx == other.end.idx
333 }
334}
335
336impl std::cmp::Eq for SourceSlice {}
337
338impl std::hash::Hash for SourceSlice {
339 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
340 Arc::as_ptr(&self.source_info.source).hash(state);
341 self.start.idx.hash(state);
342 self.end.idx.hash(state);
343 }
344}
345
346impl<R: RuleType> StackInfo<R> {
347 fn new(source_info: Arc<SourceInfo>, rule: R, pos: SourceLocation) -> Self {
348 Self {
349 source_info,
350 rule,
351 positions: (pos, None),
352 children: Vec::new(),
353 }
354 }
355}
356
357impl<R: RuleType> ParserToken<R> {
358 fn new(value: StackInfo<R>) -> Self {
359 let token_val = if !value.children.is_empty() {
360 let mut children: Vec<Self> = value
361 .children
362 .into_iter()
363 .map(|x| Self::new(x))
364 .collect::<Vec<_>>();
365 ParserTokenValue::Internal(children)
366 } else {
367 ParserTokenValue::Leaf
368 };
369
370 Self {
371 value: token_val,
372 rule: value.rule,
373 start: value.positions.0,
374 end: value.positions.1.unwrap(),
375 }
376 }
377
378 pub fn is_leaf(&self) -> bool {
379 matches!(self.value, ParserTokenValue::Leaf)
380 }
381
382 pub fn get_source(&self) -> SourceSlice {
383 self.start.slice(&self.end)
384 }
385
386 pub fn get_indices(&self) -> (SourceLocation, SourceLocation) {
387 (self.start.clone(), self.end.clone())
388 }
389
390 pub fn get_rule(&self) -> R {
391 self.rule
392 }
393
394 pub fn get_children(&self) -> Option<&Vec<ParserToken<R>>> {
395 match &self.value {
396 ParserTokenValue::Leaf => None,
397 ParserTokenValue::Internal(vec) => Some(vec),
398 }
399 }
400}
401
402impl<'a, R: RuleType> TokenTracker<'a, R> {
403 pub fn new(tokens_boo: Boo<'a, Vec<ParserToken<R>>>) -> Self {
405 Self {
406 boo: tokens_boo,
407 idx: Cell::new(0),
408 }
409 }
410
411 #[inline]
416 pub fn peek(&self) -> Option<&ParserToken<R>> {
417 self.boo.get(self.idx.get())
418 }
419
420 #[inline]
421 pub(crate) fn fail_because<O, E>(&self, error: E) -> Result<O, E> {
422 self.idx.set(0.max(self.idx.get() - 1));
423 Err(error)
424 }
425
426 #[inline]
427 pub(crate) fn get_location(&self) -> Option<SourceSlice> {
428 self.boo
429 .get_ref()
430 .get(self.idx.get())
431 .or_else(|| self.boo.get_ref().get(self.idx.get() - 1))
432 .map(|x| x.get_source())
433 }
434
435 pub(crate) fn pop_front(&'a self) -> Option<&'a ParserToken<R>> {
439 let result = self.boo.get(self.idx.get());
440 if result.is_some() {
441 self.idx.set(self.idx.get() + 1);
442 }
443 result
444 }
445
446 pub(crate) fn pop_front_count(&'a self, count: usize) -> Option<Box<[&'a ParserToken<R>]>> {
451 if self.boo.len() < self.idx.get() + count {
452 return None;
453 }
454
455 let mut tokens = Vec::new();
456 tokens.reserve(count);
457
458 for i in 0..count {
459 tokens.push(self.boo.get(self.idx.get() + i).unwrap());
460 }
461
462 self.idx.set(self.idx.get() + count);
463 Some(tokens.into_boxed_slice())
464 }
465
466 #[inline]
468 pub fn len(&self) -> usize {
469 self.boo.len() - self.idx.get()
470 }
471
472 #[inline]
474 pub fn is_empty(&self) -> bool {
475 self.boo.is_empty() || self.len() == 0
476 }
477}
478
479pub fn parse_source<R: RuleType, P: Parser<R>>(
480 source_info: Arc<SourceInfo>,
481 rule: R,
482) -> Result<ParserToken<R>, pest::error::Error<R>> {
483 let pairs = P::parse(rule, &source_info.source)?;
484
485 let mut tokens = pairs.tokens();
486
487 let mut stack: Vec<StackInfo<R>> = Vec::new();
488 let mut root: StackInfo<R> = {
489 match tokens.next().unwrap() {
490 Token::Start { rule, pos } => {
491 let pos = source_info.location_from_idx(pos.pos());
492 StackInfo::new(source_info.clone(), rule.clone(), pos)
493 }
494 _ => unreachable!(),
495 }
496 };
497
498 for token in tokens {
499 match token {
500 Token::Start { rule, pos } => {
501 let pos = source_info.location_from_idx(pos.pos());
502 stack.push(StackInfo::new(source_info.clone(), rule.clone(), pos));
503 }
504 Token::End { rule, pos } => {
505 if let Some(mut css_token) = stack.pop() {
506 css_token.positions.1 = Some(source_info.location_from_idx(pos.pos()));
507
508 if let Some(parent) = stack.last_mut() {
509 parent.children.push(css_token);
510 } else {
511 root.children.push(css_token);
512 }
513 } else {
514 root.positions.1 = Some(source_info.location_from_idx(pos.pos()));
515 }
516 }
517 }
518 }
519
520 Ok(ParserToken::new(root))
521}