expry 0.4.0

Execute an expression on an encoded (binary) value, yielding another binary value (either in decoded or encoded form). Supports custom functions. Supports parsing the expression and converting the expression to bytecode.
Documentation
#![allow(dead_code)]
#![cfg_attr(not(feature = "std"), no_std)]

use crate::termcolors::*;

use core::cmp::{min,max};
use core::fmt::Write;

#[cfg(not(feature = "std"))]
pub mod mystd {
    extern crate alloc;
    pub use alloc::format;
    pub use alloc::vec;
    pub use alloc::vec::Vec;
    pub use alloc::string::String;
    pub use alloc::boxed::Box;
    pub use alloc::string::ToString;
}

#[cfg(not(feature = "std"))]
use mystd::*;

pub struct ParserState<'b,T,E,Context=()>
where
T: PartialEq + core::fmt::Debug,
{
    token: Option<T>,
    token_count: usize, // token count
    token_pos: usize, // pos in input of current token
    #[allow(clippy::type_complexity)]
    tokenize: fn(reader: &mut &'b str, context: &'_ mut Context) -> Result<(T,usize), (E,usize,usize)>, // function returning a token and an offset of the token TO THE END (basically reader.len()), after whitespace processing, so better error messages can be generated). Same for errors
    pub reader: &'b str, // remaining string
    generate_backtrack_token_count: usize, // generate error if larger as this token count
    pub context: Context,
    depth_remaining: usize, // to avoid stack overflows
}
impl<'b,T,E,Context> core::fmt::Debug for ParserState<'b,T,E,Context>
where
T: core::fmt::Debug + PartialEq,
{
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        write!(f, "[{}={:?}@{:?} err_on={}]", self.token_count, self.token, self.token_pos, self.generate_backtrack_token_count)
    }
}

type TokenPos = (usize,usize); // from - to

#[derive(Debug)]
pub enum ParserStatus<E> {
    LookaheadBacktrack(),
    DepthLimitReached(),
    Error(E,TokenPos),
}

pub type ParserResult<T,E> = Result<T, ParserStatus<E>>;

#[derive(Copy,Clone)]
pub struct TokenInfo {
    token_count: usize,
    token_start_until_end_of_input: usize,
    token_end_until_end_of_input: usize,
}

impl TokenInfo {
    pub fn get_start(&self, length_of_whole_input: usize) -> usize {
        length_of_whole_input - self.token_start_until_end_of_input
    }
    pub fn get_end(&self, length_of_whole_input: usize) -> usize {
        length_of_whole_input - self.token_end_until_end_of_input
    }
    pub fn start_to_end_of_input(&self) -> usize {
        self.token_start_until_end_of_input
    }
    pub fn end_to_end_of_input(&self) -> usize {
        self.token_end_until_end_of_input
    }
    pub fn get_length(&self) -> usize {
        self.token_start_until_end_of_input - self.token_end_until_end_of_input
    }
    // useful if there is a more specific position inside the token to point an error to
    #[must_use]
    pub fn bound(&self, start: usize, end: usize) -> TokenInfo {
        TokenInfo {
            token_start_until_end_of_input: self.token_end_until_end_of_input + start,
            token_end_until_end_of_input: self.token_end_until_end_of_input + end,
            .. *self
        }
    }
}

type ParserPath<'b,T,E,S,Context> = fn (reader: &mut ParserState<'b,T,E,Context>) -> ParserResult<S,E>;
type ParserPathWithState<'b,T,E,S,Context,State> = fn (parser: &mut ParserState<'b,T,E,Context>, state: &mut State) -> ParserResult<S,E>;

impl<'b,T,E,Context> ParserState<'b,T,E,Context>
where
T: PartialEq + Copy + Clone + core::fmt::Debug,
E: Copy + Clone + core::fmt::Debug,
{
    pub fn repeat<P>(&mut self, mut path: P) -> ParserResult<(),E>
        where P: FnMut(&mut Self) -> ParserResult<bool,E>,
    {
        if self.depth_remaining == 0 {
            return Err(ParserStatus::DepthLimitReached());
        }
        self.depth_remaining -= 1;
        let getc = self.generate_backtrack_token_count;
        loop {
            self.generate_backtrack_token_count = self.token_count;
            match path(self) {
                Ok(false) => {
                    break;
                },
                Ok(_) => {
                    // if no progress can be made, stop
                    if self.token_count <= self.generate_backtrack_token_count {
                        break;
                    }
                },
                Err(ParserStatus::LookaheadBacktrack()) => {
                    break;
                }
                Err(e) => {
                    self.depth_remaining += 1;
                    return Err(e);
                }
            }
        }
        self.depth_remaining += 1;
        self.generate_backtrack_token_count = getc;
        Ok(())
    }

    pub fn opt<S, P>(&mut self, path: P) -> ParserResult<Option<S>,E>
        where P: FnOnce(&mut Self) -> ParserResult<S,E>,
    {
        let getc = self.generate_backtrack_token_count;
        self.generate_backtrack_token_count = self.token_count;
        match path(self) {
            Ok(v) => {
                Ok(Some(v))
            },
            Err(ParserStatus::LookaheadBacktrack()) => {
                self.generate_backtrack_token_count = getc;
                Ok(None)
            },
            Err(e) => {
                Err(e)
            },
        }
    }

    pub fn choose<S>(&mut self, paths: &[ParserPath<'b,T,E,S,Context>], err: fn () -> E) -> ParserResult<S,E> {
        debug_assert!(!paths.is_empty());
        if self.depth_remaining == 0 {
            return Err(ParserStatus::DepthLimitReached());
        }
        self.depth_remaining -= 1;
        let getc = self.generate_backtrack_token_count;
        self.generate_backtrack_token_count = self.token_count;
        for p in paths {
            match p(self) {
                Err(ParserStatus::LookaheadBacktrack()) => {
                },
                // check if no tokens are accepted, if so continue with next candidate
                Ok(_) if self.token_count <= self.generate_backtrack_token_count => {
                    // can happen when there is a rule that only has a repeat inside
                },
                v => {
                    self.depth_remaining += 1;
                    return v;
                },
            }
        }
        self.depth_remaining += 1;
        self.generate_backtrack_token_count = getc;
        if self.generate_backtrack_token_count == self.token_count {
            return Err(ParserStatus::LookaheadBacktrack());
        }
        Err(ParserStatus::Error(err(), (self.token_pos, self.reader.len())))
    }

    pub fn choose_with_state<S,State>(&mut self, paths: &[ParserPathWithState<'b,T,E,S,Context,State>], state: &mut State, err: fn () -> E) -> ParserResult<S,E> {
        debug_assert!(!paths.is_empty());
        if self.depth_remaining == 0 {
            return Err(ParserStatus::DepthLimitReached());
        }
        self.depth_remaining -= 1;
        let getc = self.generate_backtrack_token_count;
        self.generate_backtrack_token_count = self.token_count;
        for p in paths {
            match p(self, state) {
                Err(ParserStatus::LookaheadBacktrack()) => {
                },
                // check if no tokens are accepted, if so continue with next candidate
                Ok(_) if self.token_count <= self.generate_backtrack_token_count => {
                    // can happen when there is a rule that only has a repeat inside
                },
                v => {
                    self.depth_remaining += 1;
                    return v;
                },
            }
        }
        self.depth_remaining += 1;
        self.generate_backtrack_token_count = getc;
        if self.generate_backtrack_token_count == self.token_count {
            return Err(ParserStatus::LookaheadBacktrack());
        }
        Err(ParserStatus::Error(err(), (self.token_pos, self.reader.len())))
    }

    /// Useful for recursive functions (like for parsing negation and the unary minus).
    pub fn call<S>(&mut self, path: ParserPath<'b,T,E,S,Context>) -> ParserResult<S,E> {
        if self.depth_remaining == 0 {
            return Err(ParserStatus::DepthLimitReached());
        }
        self.depth_remaining -= 1;
        let result = path(self);
        self.depth_remaining += 1;
        result
    }

    fn _peek(&mut self) -> Result<T,ParserStatus<E>> {
        if let Some(x) = &self.token {
            return Ok(*x);
        }
        let (t,token_until_end) = (self.tokenize)(&mut self.reader, &mut self.context).map_err(|(x,error_start,error_end)| {
            self.token_pos = error_start;
            debug_assert!(self.token_pos >= self.reader.len());
            ParserStatus::Error(x, (error_start, error_end))
        })?;
        //eprintln!("token: {:?}", t);
        self.token = Some(t);
        self.token_pos = token_until_end;
        debug_assert!(self.token_pos >= self.reader.len());
        Ok(t)
    }

    fn _token_info(&self) -> TokenInfo {
        TokenInfo {
            token_count: self.token_count,
            token_start_until_end_of_input: self.token_pos,
            token_end_until_end_of_input: self.reader.len(),
        }
    }

    fn _next(&mut self) -> TokenInfo {
        let info = self._token_info();
        self.token = None;
        self.token_count += 1;
        info
    }

    // returns token pos in input, `info` is extra hint that is related
    pub fn accept<F>(&mut self, expected: T, info: Option<&TokenInfo>, err: F) -> ParserResult<TokenInfo,E>
        where F: Fn () -> E,
    {
        match self._peek()? {
            t if expected == t => {
                Ok(self._next())
            },
            _ if self.generate_backtrack_token_count == self.token_count => {
                Err(ParserStatus::LookaheadBacktrack())
            },
            _ => {
                //eprintln!("current token: {:?}", self.token);
                let (start, end) = if let Some(TokenInfo{token_count: _, token_start_until_end_of_input, token_end_until_end_of_input}) = info { (*token_start_until_end_of_input, *token_end_until_end_of_input) } else { (self.token_pos, self.reader.len()) };
                Err(ParserStatus::Error(err(), (start, end)))
            }
        }
    }
    
    pub fn get(&mut self) -> ParserResult<(T,TokenInfo),E> {
        let retval = self._peek()?;
        Ok((retval, self._next()))
    }

    pub fn undo_get(&mut self, token: T, info: TokenInfo) {
        debug_assert!(self.token.is_none());
        self.token = Some(token);
        self.token_count = info.token_count;
    }

    // generate an error directly related with the current token (possibly triggering a backtrace)
    pub fn error_token<F>(&mut self, token: T, info: TokenInfo, err: F) -> ParserStatus<E>
        where F: FnOnce(&mut Self) -> E,
    {
        if self.generate_backtrack_token_count == info.token_count {
            self.token = Some(token);
            self.token_count = info.token_count;
            ParserStatus::LookaheadBacktrack()
        } else {
            ParserStatus::Error(err(self), (info.token_start_until_end_of_input, info.token_end_until_end_of_input))
        }
    }

    // generate an error not related with the current token (so it will not backtrack)
    pub fn error_other(&mut self, info: &TokenInfo, err: E) -> ParserStatus<E> {
        ParserStatus::Error(err, (info.token_start_until_end_of_input, info.token_end_until_end_of_input))
    }

    pub fn token_info(&mut self) -> ParserResult<TokenInfo,E> {
        self._peek()?;
        Ok(self._token_info())
    }

    pub fn context(&mut self) -> &mut Context {
        &mut self.context
    }

    pub fn consume(self) -> Context {
        self.context
    }

    #[allow(clippy::type_complexity)]
    pub fn new_with(reader: &'b str, tokenize: fn(reader: &mut &'b str, context: &'_ mut Context) -> Result<(T,usize), (E,usize,usize)>, context: Context) -> Self {
        const MAX_DEPTH: usize = 128;
        Self {
            token: None,
            token_count: 0,
            token_pos: 0,
            reader,
            tokenize,
            generate_backtrack_token_count: usize::MAX,
            context,
            depth_remaining: MAX_DEPTH,
        }
    }

    #[allow(clippy::type_complexity)]
    pub fn parse<R>(&mut self, f: fn(&mut Self) -> ParserResult<R,E>, unexpected_token: E, depth_limit_reached: E) -> Result<R,(E,usize,usize,Option<(usize,usize)>)> {
        f(self).map_err(|x| {
            //eprintln!("error: {:?} on token {:?}", x, self);
            match x {
                ParserStatus::LookaheadBacktrack() => {
                    (unexpected_token, self.token_pos, self.reader.len(), None)
                },
                ParserStatus::Error(err, token_info) => {
                    let extra = if self.token_pos != token_info.0 {
                        Some((self.token_pos, self.reader.len()))
                    } else {
                        None
                    };
                    (err, token_info.0, token_info.1, extra)
                }
                ParserStatus::DepthLimitReached() => {
                    (depth_limit_reached, self.token_pos, self.reader.len(), None)
                },
            }
        })
    }
}

pub trait Spanner {
    fn next(&mut self, v: char) -> bool;
    fn valid(&mut self, _len: usize) -> bool {
        true
    }
    fn span<'b>(&mut self, reader: &mut &'b str) -> Option<&'b str> {
        let index = reader.char_indices().find(|(_pos, c)| !self.next(*c)).map(|(pos, _c)| pos).unwrap_or(reader.len());
        if !self.valid(index) {
            return None;
        }
        let value = &reader[0..index];
        *reader = &reader[index..];
        Some(value)
    }
}

pub struct StringLiteralSpanner {
    first: bool,
    end: bool,
    prev: char,
    pub unescape_needed: bool,
    delim: char,
}

impl Spanner for StringLiteralSpanner {
    fn next(&mut self, b: char) -> bool {
        if self.end {
            return false;
        }
        if self.first {
            self.first = false;
            return b == self.delim;
        }
        if b == self.delim && self.prev != '\\' {
            self.end = true;
            return true;
        }
        if self.prev == '\\' {
            self.prev = 0 as char;
            self.unescape_needed = true;
        } else {
            self.prev = b;
        }
        true
    }
    fn valid(&mut self, _len: usize) -> bool {
        self.end
    }
}
impl StringLiteralSpanner {
    pub fn new(delim: char) -> Self {
        Self {
            first: true,
            end: false,
            prev: 0 as char,
            unescape_needed: false,
            delim,
        }
    }
}

pub struct NumberSpanner {
    first: bool,
    prev: char,
    pub float: bool,
    has_digits: bool,
}

impl Spanner for NumberSpanner {
    fn next(&mut self, b: char) -> bool {
        if self.first && !(b.is_ascii_digit() || b == '-') {
            return false;
        }
        if !self.first && !(b.is_ascii_digit() || b == '.' || b == 'e' || b == 'E' || ((self.prev == 'e' || self.prev == 'E') && (b == '+' || b == '-'))) {
            return false;
        }
        self.first = false;
        self.float = self.float || b == '.' || b == 'e' || b == 'E';
        self.has_digits = self.has_digits || b.is_ascii_digit();
        self.prev = b;
        true
    }
    fn valid(&mut self, _len: usize) -> bool {
        self.has_digits
    }
}

impl NumberSpanner {
    pub fn new() -> Self {
        Self {
            first: true,
            prev: 0 as char,
            float: false,
            has_digits: false,
        }
    }
}

impl Default for NumberSpanner {
    fn default() -> Self {
        Self::new()
    }
}

// INCLUDE_OUTER_TAGS=false: match all contents of {{...}}, so matches up until a '}' is seen
// INCLUDE_OUTER+TAGS=true: match anything between '{' and '}' (supports nesting of '{'/'}' and
// strings)
pub struct HairyTemplateTagContentSpanner<const INCLUDE_OUTER_TAGS: bool> {
    prev_was_escape: bool,
    string: bool,
    nested: u32,
    len: usize,
}

impl<const INCLUDE_OUTER_TAGS: bool> Spanner for HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
    fn next(&mut self, b: char) -> bool {
        if INCLUDE_OUTER_TAGS && self.nested == 0 && !self.string && self.len > 0 {
            return false;
        }
        if !self.prev_was_escape && !self.string && b == '{' {
            self.nested += 1;
        }
        if !self.prev_was_escape && !self.string && b == '}' {
            if self.nested == 0 {
                return false;
            }
            self.nested -= 1;
        }
        if !self.prev_was_escape && b == '"' {
            self.string = !self.string;
        }
        self.prev_was_escape = !self.prev_was_escape && b == '\\';
        self.len += 1;
        true
    }
    fn valid(&mut self, _len: usize) -> bool {
        self.nested == 0 && !self.string && !self.prev_was_escape
    }
}
impl<const INCLUDE_OUTER_TAGS: bool> HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
    pub fn new() -> Self {
        Self {
            prev_was_escape: false,
            string: false,
            nested: 0,
            len: 0,
        }
    }
}

impl<const INCLUDE_OUTER_TAGS: bool> Default for HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
    fn default() -> Self {
        Self::new()
    }
}
pub trait Acceptor<'a> {
    fn accept(self, expected: &str) -> bool;
    fn span_fn<M>(self, matcher: &mut M) -> Option<&'a str>
    where
        M: FnMut(char) -> bool;
    fn span<P: Spanner>(self, spanner: &mut P) -> Option<&'a str>;
}
impl<'a> Acceptor<'a> for &mut &'a str {
    fn accept(self, expected: &str) -> bool {
        if let Some(remaining) = self.strip_prefix(expected) {
            *self = remaining;
            true
        } else {
            false
        }
    }

    // missing from the standard api
    fn span_fn<M>(self, matcher: &mut M) -> Option<&'a str>
    where
        M: FnMut(char) -> bool,
    {
        let index = self.char_indices().find(|(_pos, c)| !matcher(*c)).map(|(pos, _c)| pos).unwrap_or(self.len());
        if index == 0 {
            return None;
        }
        let value = &self[0..index];
        *self = &self[index..];
        Some(value)
    }
    fn span<P: Spanner>(self, spanner: &mut P) -> Option<&'a str> {
        spanner.span(self)
    }
}

#[derive(Debug, Clone)]
pub struct LineContext {
    offsets: Vec<u32>,
}

impl LineContext {
    pub fn empty() -> Self {
        Self { offsets: Vec::new(), }
    }
    pub fn new(reader: &str) -> Self {
        let mut offsets : Vec<u32> = Vec::new();
        offsets.push(0);
        for (i,c) in reader.char_indices() {
            if c == '\n' {
                offsets.push(i as u32 + 1);
            }
        }
        offsets.push(reader.len() as u32 + 1);
        Self {
            offsets
        }
    }
    pub fn format_error_context_short(&self, reader: &str, start: usize, end: usize) -> Result<String,core::fmt::Error> {
        // convert from distance to end to position from start
        let (start, end) = (reader.len() - start, reader.len() - end);
        let (_, _, line_start, line_end) = self.position_to_line_info(start as u32);
        let line = &reader[line_start as usize..line_end as usize];
        let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);

        const UNDERLINE : &str = ""; //▔↑⬆️

        let mut retval = String::new();

        let mut highlight = false;
        for (i,c) in line.char_indices() {
            if i >= end_in_line {
                if highlight {
                    write!(retval, "")?;
                    highlight = false;
                }
            } else if i >= start_in_line && !highlight {
                write!(retval, "")?;
                highlight = true;
            }
            write!(retval, "{}", c)?;
        }
        writeln!(retval)?;

        let mut pos = 0;
        for (i,c) in line.char_indices() {
            pos = i+1;
            if i >= end_in_line {
                break;
            } else if i >= start_in_line {
                write!(retval, "{}", UNDERLINE)?;
            } else if c == '\t' {
                write!(retval, "\t")?;
            } else {
                write!(retval, " ")?;
            }
        }
        for _ in pos..end_in_line {
            write!(retval, "{}", UNDERLINE)?;
        }
        Ok(retval)
    }
    pub fn format_error_context_html(&self, reader: &str, start: usize, end: usize, extra_line_no: u32) -> Result<(u32, String),core::fmt::Error> {
        // convert from distance to end to position from start
        let (start, end) = (reader.len() - start, reader.len() - end);
        let (line_no, _, line_start, line_end) = self.position_to_line_info(start as u32);
        let line = &reader[line_start as usize..line_end as usize];
        let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);

        let line_no_text = format!("{}", line_no+extra_line_no);
        let mut retval = String::new();

        write!(retval, "<code><span class=\"lineno\">{}</span>", line_no_text)?;
        let mut highlight = false;
        for (i,c) in line.char_indices() {
            if i >= end_in_line {
                if highlight {
                    write!(retval, "</span>")?;
                    highlight = false;
                }
            } else if i >= start_in_line && !highlight {
                write!(retval, "<span class=\"highlight\">")?;
                highlight = true;
            }
            write!(retval, "{}", c)?;
        }
        if highlight {
            write!(retval, "</span>")?;
        }
        write!(retval, "</code>")?;
        Ok((line_no+extra_line_no, retval))
    }
    pub fn format_error_context_console(&self, reader: &str, start: usize, end: usize, extra_line_no: u32) -> Result<(u32, String,String),core::fmt::Error> {
        // convert from distance to end to position from start
        let (start, end) = (reader.len() - start, reader.len() - end);
        let (line_no, _, line_start, line_end) = self.position_to_line_info(start as u32);
        let line = &reader[line_start as usize..line_end as usize];
        let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);

        const DELIM : &str = ""; // │ | "; //⸾⌇
        const DELIM_ALT : &str = ""; // ┥ ┤│ | "; //⸾⌇
        const DELIM_ALT2 : &str = ""; // │ | "; //⸾⌇
        const UNDERLINE : &str = ""; //
        let line_no_text = format!("{:>3}", line_no+extra_line_no);
        let prefix = format!("{}{TERM_BRIGHT_BLACK}{}{TERM_RESET}", [' '].iter().cycle().take(line_no_text.len()).collect::<String>(), DELIM);
        let mut retval = String::new();

        write!(retval, "{TERM_BRIGHT_BLACK}{}{}{TERM_RESET}", line_no_text, DELIM_ALT)?;
        let mut highlight = false;
        for (i,c) in line.char_indices() {
            if i >= end_in_line {
                if highlight {
                    write!(retval, "{TERM_RESET}{TERM_DIM_DEFAULT}")?;
                    highlight = false;
                }
            } else if i >= start_in_line && !highlight {
                write!(retval, "{TERM_BRIGHT_YELLOW}")?;
                highlight = true;
            }
            write!(retval, "{}", c)?;
        }
        writeln!(retval, "{TERM_RESET}")?;

        write!(retval, "{}{TERM_BRIGHT_RED}", prefix)?;
        let mut pos = 0;
        for (i,c) in line.char_indices() {
            pos = i+1;
            if i >= end_in_line {
                break;
            } else if i >= start_in_line {
                write!(retval, "{}", UNDERLINE)?;
            } else if c == '\t' {
                write!(retval, "\t")?;
            } else {
                write!(retval, " ")?;
            }
        }
        for _ in pos..end_in_line {
            write!(retval, "{}", UNDERLINE)?;
        }
        writeln!(retval, "{TERM_RESET}")?;
        Ok((line_no+extra_line_no, prefix, retval))
    }
    pub fn remaining_to_line_info(&self, remaining: u32) -> (u32, u32, u32, u32) {
        if let Some(last) = self.offsets.last() {
            self.position_to_line_info(last - 1 - remaining)
        } else {
            (0, 0, 0, 0)
        }
    }
    // returns (line_no, column_no, offset_start_of_line, offset_end_of_line)
    pub fn position_to_line_info(&self, pos: u32) -> (u32, u32, u32, u32) {
        let i = self.offsets.partition_point(|x| *x <= pos);
        if i > 0 && i < self.offsets.len() {
            let start_of_this_line = self.offsets[i-1];
            let start_of_next_line = self.offsets[i];
            return (i as u32, pos - start_of_this_line, start_of_this_line, start_of_next_line-1) // +1 because line numbers start at 1, and +1 in begin position to skip enter
        }
        (0, 0, 0, 0) // should not occur because line_no_offsets always should have a last extra entry
    }

}


#[cfg(test)]
mod tests {
    use crate::*;
    #[test]
    fn number_spanner() {
        let mut spanner = NumberSpanner::new();
        let mut reader = "3.14";
        assert!(spanner.span(&mut reader).is_some());
        assert!(spanner.float);
    }
}