use alloc::borrow::ToOwned;
use alloc::boxed::Box;
use alloc::rc::Rc;
use alloc::vec;
use alloc::vec::Vec;
use core::num::NonZeroUsize;
use core::ops::Range;
use core::sync::atomic::{AtomicUsize, Ordering};
use crate::error::{Error, ErrorVariant};
use crate::iterators::{pairs, QueueableToken};
use crate::position::{self, Position};
use crate::span::Span;
use crate::stack::Stack;
use crate::RuleType;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Lookahead {
Positive,
Negative,
None,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Atomicity {
Atomic,
CompoundAtomic,
NonAtomic,
}
pub type ParseResult<S> = Result<S, S>;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum MatchDir {
BottomToTop,
TopToBottom,
}
static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
pub fn set_call_limit(limit: Option<NonZeroUsize>) {
CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
}
#[derive(Debug)]
struct CallLimitTracker {
current_call_limit: Option<(usize, usize)>,
}
impl Default for CallLimitTracker {
fn default() -> Self {
let limit = CALL_LIMIT.load(Ordering::Relaxed);
let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
Self { current_call_limit }
}
}
impl CallLimitTracker {
fn limit_reached(&self) -> bool {
self.current_call_limit
.map_or(false, |(current, limit)| current >= limit)
}
fn increment_depth(&mut self) {
if let Some((current, _)) = &mut self.current_call_limit {
*current += 1;
}
}
}
#[derive(Debug)]
pub struct ParserState<'i, R: RuleType> {
position: Position<'i>,
queue: Vec<QueueableToken<R>>,
lookahead: Lookahead,
pos_attempts: Vec<R>,
neg_attempts: Vec<R>,
attempt_pos: usize,
atomicity: Atomicity,
stack: Stack<Span<'i>>,
call_tracker: CallLimitTracker,
}
pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
where
F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
{
let state = ParserState::new(input);
match f(state) {
Ok(state) => {
let len = state.queue.len();
Ok(pairs::new(Rc::new(state.queue), input, 0, len))
}
Err(mut state) => {
let variant = if state.reached_call_limit() {
ErrorVariant::CustomError {
message: "call limit reached".to_owned(),
}
} else {
state.pos_attempts.sort();
state.pos_attempts.dedup();
state.neg_attempts.sort();
state.neg_attempts.dedup();
ErrorVariant::ParsingError {
positives: state.pos_attempts.clone(),
negatives: state.neg_attempts.clone(),
}
};
Err(Error::new_from_pos(
variant,
position::Position::new(input, state.attempt_pos).unwrap(),
))
}
}
}
impl<'i, R: RuleType> ParserState<'i, R> {
pub fn new(input: &'i str) -> Box<Self> {
Box::new(ParserState {
position: Position::from_start(input),
queue: vec![],
lookahead: Lookahead::None,
pos_attempts: vec![],
neg_attempts: vec![],
attempt_pos: 0,
atomicity: Atomicity::NonAtomic,
stack: Stack::new(),
call_tracker: Default::default(),
})
}
pub fn position(&self) -> &Position<'i> {
&self.position
}
pub fn atomicity(&self) -> Atomicity {
self.atomicity
}
#[inline]
fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
if self.call_tracker.limit_reached() {
return Err(self);
}
self.call_tracker.increment_depth();
Ok(self)
}
#[inline]
fn reached_call_limit(&self) -> bool {
self.call_tracker.limit_reached()
}
#[inline]
pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let actual_pos = self.position.pos();
let index = self.queue.len();
let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
(self.pos_attempts.len(), self.neg_attempts.len())
} else {
(0, 0)
};
if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
self.queue.push(QueueableToken::Start {
end_token_index: 0,
input_pos: actual_pos,
});
}
let attempts = self.attempts_at(actual_pos);
let result = f(self);
match result {
Ok(mut new_state) => {
if new_state.lookahead == Lookahead::Negative {
new_state.track(
rule,
actual_pos,
pos_attempts_index,
neg_attempts_index,
attempts,
);
}
if new_state.lookahead == Lookahead::None
&& new_state.atomicity != Atomicity::Atomic
{
let new_index = new_state.queue.len();
match new_state.queue[index] {
QueueableToken::Start {
ref mut end_token_index,
..
} => *end_token_index = new_index,
_ => unreachable!(),
};
let new_pos = new_state.position.pos();
new_state.queue.push(QueueableToken::End {
start_token_index: index,
rule,
input_pos: new_pos,
});
}
Ok(new_state)
}
Err(mut new_state) => {
if new_state.lookahead != Lookahead::Negative {
new_state.track(
rule,
actual_pos,
pos_attempts_index,
neg_attempts_index,
attempts,
);
}
if new_state.lookahead == Lookahead::None
&& new_state.atomicity != Atomicity::Atomic
{
new_state.queue.truncate(index);
}
Err(new_state)
}
}
}
fn attempts_at(&self, pos: usize) -> usize {
if self.attempt_pos == pos {
self.pos_attempts.len() + self.neg_attempts.len()
} else {
0
}
}
fn track(
&mut self,
rule: R,
pos: usize,
pos_attempts_index: usize,
neg_attempts_index: usize,
prev_attempts: usize,
) {
if self.atomicity == Atomicity::Atomic {
return;
}
let curr_attempts = self.attempts_at(pos);
if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
return;
}
if pos == self.attempt_pos {
self.pos_attempts.truncate(pos_attempts_index);
self.neg_attempts.truncate(neg_attempts_index);
}
if pos > self.attempt_pos {
self.pos_attempts.clear();
self.neg_attempts.clear();
self.attempt_pos = pos;
}
let attempts = if self.lookahead != Lookahead::Negative {
&mut self.pos_attempts
} else {
&mut self.neg_attempts
};
if pos == self.attempt_pos {
attempts.push(rule);
}
}
#[inline]
pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let token_index = self.queue.len();
let initial_pos = self.position;
let result = f(self);
match result {
Ok(new_state) => Ok(new_state),
Err(mut new_state) => {
new_state.position = initial_pos;
new_state.queue.truncate(token_index);
Err(new_state)
}
}
}
#[inline]
pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
where
F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let mut result = f(self);
loop {
match result {
Ok(state) => result = f(state),
Err(state) => return Ok(state),
};
}
}
#[inline]
pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
match f(self) {
Ok(state) | Err(state) => Ok(state),
}
}
#[inline]
pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(char) -> bool,
{
if self.position.match_char_by(f) {
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
if self.position.match_string(string) {
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
if self.position.match_insensitive(string) {
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
if self.position.match_range(range) {
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
if self.position.skip(n) {
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
self.position.skip_until(strings);
Ok(self)
}
#[inline]
pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
if self.position.at_start() {
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
if self.position.at_end() {
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let initial_lookahead = self.lookahead;
self.lookahead = if is_positive {
match initial_lookahead {
Lookahead::None | Lookahead::Positive => Lookahead::Positive,
Lookahead::Negative => Lookahead::Negative,
}
} else {
match initial_lookahead {
Lookahead::None | Lookahead::Positive => Lookahead::Negative,
Lookahead::Negative => Lookahead::Positive,
}
};
let initial_pos = self.position;
let result = f(self.checkpoint());
let result_state = match result {
Ok(mut new_state) => {
new_state.position = initial_pos;
new_state.lookahead = initial_lookahead;
Ok(new_state.restore())
}
Err(mut new_state) => {
new_state.position = initial_pos;
new_state.lookahead = initial_lookahead;
Err(new_state.restore())
}
};
if is_positive {
result_state
} else {
match result_state {
Ok(state) => Err(state),
Err(state) => Ok(state),
}
}
}
#[inline]
pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let initial_atomicity = self.atomicity;
let should_toggle = self.atomicity != atomicity;
if should_toggle {
self.atomicity = atomicity;
}
let result = f(self);
match result {
Ok(mut new_state) => {
if should_toggle {
new_state.atomicity = initial_atomicity;
}
Ok(new_state)
}
Err(mut new_state) => {
if should_toggle {
new_state.atomicity = initial_atomicity;
}
Err(new_state)
}
}
}
#[inline]
pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let start = self.position;
let result = f(self);
match result {
Ok(mut state) => {
let end = state.position;
state.stack.push(start.span(&end));
Ok(state)
}
Err(state) => Err(state),
}
}
#[inline]
pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
let string = self
.stack
.peek()
.expect("peek was called on empty stack")
.as_str();
self.match_string(string)
}
#[inline]
pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
let string = self
.stack
.pop()
.expect("pop was called on empty stack")
.as_str();
self.match_string(string)
}
#[inline]
pub fn stack_match_peek_slice(
mut self: Box<Self>,
start: i32,
end: Option<i32>,
match_dir: MatchDir,
) -> ParseResult<Box<Self>> {
let range = match constrain_idxs(start, end, self.stack.len()) {
Some(r) => r,
None => return Err(self),
};
if range.end <= range.start {
return Ok(self);
}
let mut position = self.position;
let result = {
let mut iter_b2t = self.stack[range].iter();
let matcher = |span: &Span| position.match_string(span.as_str());
match match_dir {
MatchDir::BottomToTop => iter_b2t.all(matcher),
MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
}
};
if result {
self.position = position;
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
}
#[inline]
pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
let mut position = self.position;
let mut result = true;
while let Some(span) = self.stack.pop() {
result = position.match_string(span.as_str());
if !result {
break;
}
}
if result {
self.position = position;
Ok(self)
} else {
Err(self)
}
}
#[inline]
pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
match self.stack.pop() {
Some(_) => Ok(self),
None => Err(self),
}
}
#[inline]
pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
match f(self.checkpoint()) {
Ok(state) => Ok(state.checkpoint_ok()),
Err(state) => Err(state.restore()),
}
}
#[inline]
pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
self.stack.snapshot();
self
}
#[inline]
pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
self.stack.clear_snapshot();
self
}
#[inline]
pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
self.stack.restore();
self
}
}
fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
let start_norm = normalize_index(start, len)?;
let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
Some(start_norm..end_norm)
}
fn normalize_index(i: i32, len: usize) -> Option<usize> {
if i > len as i32 {
None
} else if i >= 0 {
Some(i as usize)
} else {
let real_i = len as i32 + i;
if real_i >= 0 {
Some(real_i as usize)
} else {
None
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn normalize_index_pos() {
assert_eq!(normalize_index(4, 6), Some(4));
assert_eq!(normalize_index(5, 5), Some(5));
assert_eq!(normalize_index(6, 3), None);
}
#[test]
fn normalize_index_neg() {
assert_eq!(normalize_index(-4, 6), Some(2));
assert_eq!(normalize_index(-5, 5), Some(0));
assert_eq!(normalize_index(-6, 3), None);
}
}