#![allow(dead_code)]
#![cfg_attr(not(feature = "std"), no_std)]
use crate::termcolors::*;
use core::cmp::{min,max};
use core::fmt::Write;
#[cfg(not(feature = "std"))]
pub mod mystd {
extern crate alloc;
pub use alloc::format;
pub use alloc::vec;
pub use alloc::vec::Vec;
pub use alloc::string::String;
pub use alloc::boxed::Box;
pub use alloc::string::ToString;
}
#[cfg(not(feature = "std"))]
use mystd::*;
pub struct ParserState<'b,T,E,Context=()>
where
T: PartialEq + core::fmt::Debug,
{
token: Option<T>,
token_count: usize, token_pos: usize, tokenize: fn(reader: &mut &'b str, context: &'_ mut Context) -> Result<(T,usize), (E,usize,usize)>, pub reader: &'b str, generate_backtrack_token_count: usize, pub context: Context,
depth_remaining: usize, }
impl<'b,T,E,Context> core::fmt::Debug for ParserState<'b,T,E,Context>
where
T: core::fmt::Debug + PartialEq,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "[{}={:?}@{:?} err_on={}]", self.token_count, self.token, self.token_pos, self.generate_backtrack_token_count)
}
}
type TokenPos = (usize,usize);
#[derive(Debug)]
pub enum ParserStatus<E> {
LookaheadBacktrack(),
DepthLimitReached(),
Error(E,TokenPos),
}
pub type ParserResult<T,E> = Result<T, ParserStatus<E>>;
#[derive(Copy,Clone)]
pub struct TokenInfo {
token_count: usize,
token_start_until_end_of_input: usize,
token_end_until_end_of_input: usize,
}
impl TokenInfo {
pub fn get_start(&self, length_of_whole_input: usize) -> usize {
length_of_whole_input - self.token_start_until_end_of_input
}
pub fn get_end(&self, length_of_whole_input: usize) -> usize {
length_of_whole_input - self.token_end_until_end_of_input
}
pub fn start_to_end_of_input(&self) -> usize {
self.token_start_until_end_of_input
}
pub fn end_to_end_of_input(&self) -> usize {
self.token_end_until_end_of_input
}
pub fn get_length(&self) -> usize {
self.token_start_until_end_of_input - self.token_end_until_end_of_input
}
#[must_use]
pub fn bound(&self, start: usize, end: usize) -> TokenInfo {
TokenInfo {
token_start_until_end_of_input: self.token_end_until_end_of_input + start,
token_end_until_end_of_input: self.token_end_until_end_of_input + end,
.. *self
}
}
}
type ParserPath<'b,T,E,S,Context> = fn (reader: &mut ParserState<'b,T,E,Context>) -> ParserResult<S,E>;
type ParserPathWithState<'b,T,E,S,Context,State> = fn (parser: &mut ParserState<'b,T,E,Context>, state: &mut State) -> ParserResult<S,E>;
impl<'b,T,E,Context> ParserState<'b,T,E,Context>
where
T: PartialEq + Copy + Clone + core::fmt::Debug,
E: Copy + Clone + core::fmt::Debug,
{
pub fn repeat<P>(&mut self, mut path: P) -> ParserResult<(),E>
where P: FnMut(&mut Self) -> ParserResult<bool,E>,
{
if self.depth_remaining == 0 {
return Err(ParserStatus::DepthLimitReached());
}
self.depth_remaining -= 1;
let getc = self.generate_backtrack_token_count;
loop {
self.generate_backtrack_token_count = self.token_count;
match path(self) {
Ok(false) => {
break;
},
Ok(_) => {
if self.token_count <= self.generate_backtrack_token_count {
break;
}
},
Err(ParserStatus::LookaheadBacktrack()) => {
break;
}
Err(e) => {
self.depth_remaining += 1;
return Err(e);
}
}
}
self.depth_remaining += 1;
self.generate_backtrack_token_count = getc;
Ok(())
}
pub fn opt<S, P>(&mut self, path: P) -> ParserResult<Option<S>,E>
where P: FnOnce(&mut Self) -> ParserResult<S,E>,
{
let getc = self.generate_backtrack_token_count;
self.generate_backtrack_token_count = self.token_count;
match path(self) {
Ok(v) => {
Ok(Some(v))
},
Err(ParserStatus::LookaheadBacktrack()) => {
self.generate_backtrack_token_count = getc;
Ok(None)
},
Err(e) => {
Err(e)
},
}
}
pub fn choose<S>(&mut self, paths: &[ParserPath<'b,T,E,S,Context>], err: fn () -> E) -> ParserResult<S,E> {
debug_assert!(!paths.is_empty());
if self.depth_remaining == 0 {
return Err(ParserStatus::DepthLimitReached());
}
self.depth_remaining -= 1;
let getc = self.generate_backtrack_token_count;
self.generate_backtrack_token_count = self.token_count;
for p in paths {
match p(self) {
Err(ParserStatus::LookaheadBacktrack()) => {
},
Ok(_) if self.token_count <= self.generate_backtrack_token_count => {
},
v => {
self.depth_remaining += 1;
return v;
},
}
}
self.depth_remaining += 1;
self.generate_backtrack_token_count = getc;
if self.generate_backtrack_token_count == self.token_count {
return Err(ParserStatus::LookaheadBacktrack());
}
Err(ParserStatus::Error(err(), (self.token_pos, self.reader.len())))
}
pub fn choose_with_state<S,State>(&mut self, paths: &[ParserPathWithState<'b,T,E,S,Context,State>], state: &mut State, err: fn () -> E) -> ParserResult<S,E> {
debug_assert!(!paths.is_empty());
if self.depth_remaining == 0 {
return Err(ParserStatus::DepthLimitReached());
}
self.depth_remaining -= 1;
let getc = self.generate_backtrack_token_count;
self.generate_backtrack_token_count = self.token_count;
for p in paths {
match p(self, state) {
Err(ParserStatus::LookaheadBacktrack()) => {
},
Ok(_) if self.token_count <= self.generate_backtrack_token_count => {
},
v => {
self.depth_remaining += 1;
return v;
},
}
}
self.depth_remaining += 1;
self.generate_backtrack_token_count = getc;
if self.generate_backtrack_token_count == self.token_count {
return Err(ParserStatus::LookaheadBacktrack());
}
Err(ParserStatus::Error(err(), (self.token_pos, self.reader.len())))
}
pub fn call<S>(&mut self, path: ParserPath<'b,T,E,S,Context>) -> ParserResult<S,E> {
if self.depth_remaining == 0 {
return Err(ParserStatus::DepthLimitReached());
}
self.depth_remaining -= 1;
let result = path(self);
self.depth_remaining += 1;
result
}
fn _peek(&mut self) -> Result<T,ParserStatus<E>> {
if let Some(x) = &self.token {
return Ok(*x);
}
let (t,token_until_end) = (self.tokenize)(&mut self.reader, &mut self.context).map_err(|(x,error_start,error_end)| {
self.token_pos = error_start;
debug_assert!(self.token_pos >= self.reader.len());
ParserStatus::Error(x, (error_start, error_end))
})?;
self.token = Some(t);
self.token_pos = token_until_end;
debug_assert!(self.token_pos >= self.reader.len());
Ok(t)
}
fn _token_info(&self) -> TokenInfo {
TokenInfo {
token_count: self.token_count,
token_start_until_end_of_input: self.token_pos,
token_end_until_end_of_input: self.reader.len(),
}
}
fn _next(&mut self) -> TokenInfo {
let info = self._token_info();
self.token = None;
self.token_count += 1;
info
}
pub fn accept<F>(&mut self, expected: T, info: Option<&TokenInfo>, err: F) -> ParserResult<TokenInfo,E>
where F: Fn () -> E,
{
match self._peek()? {
t if expected == t => {
Ok(self._next())
},
_ if self.generate_backtrack_token_count == self.token_count => {
Err(ParserStatus::LookaheadBacktrack())
},
_ => {
let (start, end) = if let Some(TokenInfo{token_count: _, token_start_until_end_of_input, token_end_until_end_of_input}) = info { (*token_start_until_end_of_input, *token_end_until_end_of_input) } else { (self.token_pos, self.reader.len()) };
Err(ParserStatus::Error(err(), (start, end)))
}
}
}
pub fn get(&mut self) -> ParserResult<(T,TokenInfo),E> {
let retval = self._peek()?;
Ok((retval, self._next()))
}
pub fn undo_get(&mut self, token: T, info: TokenInfo) {
debug_assert!(self.token.is_none());
self.token = Some(token);
self.token_count = info.token_count;
}
pub fn error_token<F>(&mut self, token: T, info: TokenInfo, err: F) -> ParserStatus<E>
where F: FnOnce(&mut Self) -> E,
{
if self.generate_backtrack_token_count == info.token_count {
self.token = Some(token);
self.token_count = info.token_count;
ParserStatus::LookaheadBacktrack()
} else {
ParserStatus::Error(err(self), (info.token_start_until_end_of_input, info.token_end_until_end_of_input))
}
}
pub fn error_other(&mut self, info: &TokenInfo, err: E) -> ParserStatus<E> {
ParserStatus::Error(err, (info.token_start_until_end_of_input, info.token_end_until_end_of_input))
}
pub fn token_info(&mut self) -> ParserResult<TokenInfo,E> {
self._peek()?;
Ok(self._token_info())
}
pub fn context(&mut self) -> &mut Context {
&mut self.context
}
pub fn consume(self) -> Context {
self.context
}
pub fn new_with(reader: &'b str, tokenize: fn(reader: &mut &'b str, context: &'_ mut Context) -> Result<(T,usize), (E,usize,usize)>, context: Context) -> Self {
const MAX_DEPTH: usize = 128;
Self {
token: None,
token_count: 0,
token_pos: 0,
reader,
tokenize,
generate_backtrack_token_count: usize::MAX,
context,
depth_remaining: MAX_DEPTH,
}
}
pub fn parse<R>(&mut self, f: fn(&mut Self) -> ParserResult<R,E>, unexpected_token: E, depth_limit_reached: E) -> Result<R,(E,usize,usize,Option<(usize,usize)>)> {
f(self).map_err(|x| {
match x {
ParserStatus::LookaheadBacktrack() => {
(unexpected_token, self.token_pos, self.reader.len(), None)
},
ParserStatus::Error(err, token_info) => {
let extra = if self.token_pos != token_info.0 {
Some((self.token_pos, self.reader.len()))
} else {
None
};
(err, token_info.0, token_info.1, extra)
}
ParserStatus::DepthLimitReached() => {
(depth_limit_reached, self.token_pos, self.reader.len(), None)
},
}
})
}
}
pub trait Spanner {
fn next(&mut self, v: char) -> bool;
fn valid(&mut self, _len: usize) -> bool {
true
}
fn span<'b>(&mut self, reader: &mut &'b str) -> Option<&'b str> {
let index = reader.char_indices().find(|(_pos, c)| !self.next(*c)).map(|(pos, _c)| pos).unwrap_or(reader.len());
if !self.valid(index) {
return None;
}
let value = &reader[0..index];
*reader = &reader[index..];
Some(value)
}
}
pub struct StringLiteralSpanner {
first: bool,
end: bool,
prev: char,
pub unescape_needed: bool,
delim: char,
}
impl Spanner for StringLiteralSpanner {
fn next(&mut self, b: char) -> bool {
if self.end {
return false;
}
if self.first {
self.first = false;
return b == self.delim;
}
if b == self.delim && self.prev != '\\' {
self.end = true;
return true;
}
if self.prev == '\\' {
self.prev = 0 as char;
self.unescape_needed = true;
} else {
self.prev = b;
}
true
}
fn valid(&mut self, _len: usize) -> bool {
self.end
}
}
impl StringLiteralSpanner {
pub fn new(delim: char) -> Self {
Self {
first: true,
end: false,
prev: 0 as char,
unescape_needed: false,
delim,
}
}
}
pub struct NumberSpanner {
first: bool,
prev: char,
pub float: bool,
has_digits: bool,
}
impl Spanner for NumberSpanner {
fn next(&mut self, b: char) -> bool {
if self.first && !(b.is_ascii_digit() || b == '-') {
return false;
}
if !self.first && !(b.is_ascii_digit() || b == '.' || b == 'e' || b == 'E' || ((self.prev == 'e' || self.prev == 'E') && (b == '+' || b == '-'))) {
return false;
}
self.first = false;
self.float = self.float || b == '.' || b == 'e' || b == 'E';
self.has_digits = self.has_digits || b.is_ascii_digit();
self.prev = b;
true
}
fn valid(&mut self, _len: usize) -> bool {
self.has_digits
}
}
impl NumberSpanner {
pub fn new() -> Self {
Self {
first: true,
prev: 0 as char,
float: false,
has_digits: false,
}
}
}
impl Default for NumberSpanner {
fn default() -> Self {
Self::new()
}
}
pub struct HairyTemplateTagContentSpanner<const INCLUDE_OUTER_TAGS: bool> {
prev_was_escape: bool,
string: bool,
nested: u32,
len: usize,
}
impl<const INCLUDE_OUTER_TAGS: bool> Spanner for HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
fn next(&mut self, b: char) -> bool {
if INCLUDE_OUTER_TAGS && self.nested == 0 && !self.string && self.len > 0 {
return false;
}
if !self.prev_was_escape && !self.string && b == '{' {
self.nested += 1;
}
if !self.prev_was_escape && !self.string && b == '}' {
if self.nested == 0 {
return false;
}
self.nested -= 1;
}
if !self.prev_was_escape && b == '"' {
self.string = !self.string;
}
self.prev_was_escape = !self.prev_was_escape && b == '\\';
self.len += 1;
true
}
fn valid(&mut self, _len: usize) -> bool {
self.nested == 0 && !self.string && !self.prev_was_escape
}
}
impl<const INCLUDE_OUTER_TAGS: bool> HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
pub fn new() -> Self {
Self {
prev_was_escape: false,
string: false,
nested: 0,
len: 0,
}
}
}
impl<const INCLUDE_OUTER_TAGS: bool> Default for HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
fn default() -> Self {
Self::new()
}
}
pub trait Acceptor<'a> {
fn accept(self, expected: &str) -> bool;
fn span_fn<M>(self, matcher: &mut M) -> Option<&'a str>
where
M: FnMut(char) -> bool;
fn span<P: Spanner>(self, spanner: &mut P) -> Option<&'a str>;
}
impl<'a> Acceptor<'a> for &mut &'a str {
fn accept(self, expected: &str) -> bool {
if let Some(remaining) = self.strip_prefix(expected) {
*self = remaining;
true
} else {
false
}
}
fn span_fn<M>(self, matcher: &mut M) -> Option<&'a str>
where
M: FnMut(char) -> bool,
{
let index = self.char_indices().find(|(_pos, c)| !matcher(*c)).map(|(pos, _c)| pos).unwrap_or(self.len());
if index == 0 {
return None;
}
let value = &self[0..index];
*self = &self[index..];
Some(value)
}
fn span<P: Spanner>(self, spanner: &mut P) -> Option<&'a str> {
spanner.span(self)
}
}
#[derive(Debug, Clone)]
pub struct LineContext {
offsets: Vec<u32>,
}
impl LineContext {
pub fn empty() -> Self {
Self { offsets: Vec::new(), }
}
pub fn new(reader: &str) -> Self {
let mut offsets : Vec<u32> = Vec::new();
offsets.push(0);
for (i,c) in reader.char_indices() {
if c == '\n' {
offsets.push(i as u32 + 1);
}
}
offsets.push(reader.len() as u32 + 1);
Self {
offsets
}
}
pub fn format_error_context_short(&self, reader: &str, start: usize, end: usize) -> Result<String,core::fmt::Error> {
let (start, end) = (reader.len() - start, reader.len() - end);
let (_, _, line_start, line_end) = self.position_to_line_info(start as u32);
let line = &reader[line_start as usize..line_end as usize];
let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);
const UNDERLINE : &str = "↑";
let mut retval = String::new();
let mut highlight = false;
for (i,c) in line.char_indices() {
if i >= end_in_line {
if highlight {
write!(retval, "")?;
highlight = false;
}
} else if i >= start_in_line && !highlight {
write!(retval, "")?;
highlight = true;
}
write!(retval, "{}", c)?;
}
writeln!(retval)?;
let mut pos = 0;
for (i,c) in line.char_indices() {
pos = i+1;
if i >= end_in_line {
break;
} else if i >= start_in_line {
write!(retval, "{}", UNDERLINE)?;
} else if c == '\t' {
write!(retval, "\t")?;
} else {
write!(retval, " ")?;
}
}
for _ in pos..end_in_line {
write!(retval, "{}", UNDERLINE)?;
}
Ok(retval)
}
pub fn format_error_context(&self, reader: &str, start: usize, end: usize, extra_line_no: u32) -> Result<(u32, String,String),core::fmt::Error> {
let (start, end) = (reader.len() - start, reader.len() - end);
let (line_no, _, line_start, line_end) = self.position_to_line_info(start as u32);
let line = &reader[line_start as usize..line_end as usize];
let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);
const DELIM : &str = " │ "; const DELIM_ALT : &str = " ┿ "; const DELIM_ALT2 : &str = " ├ "; const UNDERLINE : &str = "▔";
let line_no_text = format!("{:>3}", line_no+extra_line_no);
let prefix = format!("{}{TERM_BRIGHT_BLACK}{}{TERM_RESET}", [' '].iter().cycle().take(line_no_text.len()).collect::<String>(), DELIM);
let mut retval = String::new();
write!(retval, "{TERM_BRIGHT_BLACK}{}{}{TERM_RESET}", line_no_text, DELIM_ALT)?;
let mut highlight = false;
for (i,c) in line.char_indices() {
if i >= end_in_line {
if highlight {
write!(retval, "{TERM_RESET}{TERM_DIM_DEFAULT}")?;
highlight = false;
}
} else if i >= start_in_line && !highlight {
write!(retval, "{TERM_BRIGHT_YELLOW}")?;
highlight = true;
}
write!(retval, "{}", c)?;
}
writeln!(retval, "{TERM_RESET}")?;
write!(retval, "{}{TERM_BRIGHT_RED}", prefix)?;
let mut pos = 0;
for (i,c) in line.char_indices() {
pos = i+1;
if i >= end_in_line {
break;
} else if i >= start_in_line {
write!(retval, "{}", UNDERLINE)?;
} else if c == '\t' {
write!(retval, "\t")?;
} else {
write!(retval, " ")?;
}
}
for _ in pos..end_in_line {
write!(retval, "{}", UNDERLINE)?;
}
writeln!(retval, "{TERM_RESET}")?;
Ok((line_no+extra_line_no, prefix, retval))
}
pub fn remaining_to_line_info(&self, remaining: u32) -> (u32, u32, u32, u32) {
if let Some(last) = self.offsets.last() {
self.position_to_line_info(last - 1 - remaining)
} else {
(0, 0, 0, 0)
}
}
pub fn position_to_line_info(&self, pos: u32) -> (u32, u32, u32, u32) {
let i = self.offsets.partition_point(|x| *x <= pos);
if i > 0 && i < self.offsets.len() {
let start_of_this_line = self.offsets[i-1];
let start_of_next_line = self.offsets[i];
return (i as u32, pos - start_of_this_line, start_of_this_line, start_of_next_line-1) }
(0, 0, 0, 0) }
}
#[cfg(test)]
mod tests {
use crate::*;
#[test]
fn number_spanner() {
let mut spanner = NumberSpanner::new();
let mut reader = "3.14";
assert!(spanner.span(&mut reader).is_some());
assert!(spanner.float);
}
}