#![allow(clippy::should_implement_trait, clippy::type_complexity)]
use std::iter::Peekable;
use std::str::Chars;
use unicode_width::UnicodeWidthChar;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LexerError<E> {
pub location: Loc,
pub kind: LexerErrorKind<E>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LexerErrorKind<E> {
InvalidToken,
Custom(E),
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct Loc {
pub line: u32,
pub col: u32,
pub byte_idx: usize,
}
impl Loc {
const ZERO: Loc = Loc {
line: 0,
col: 0,
byte_idx: 0,
};
}
pub enum SemanticActionResult<T> {
Continue,
Return(T),
}
impl<T> SemanticActionResult<T> {
pub fn map_token<F, T1>(self, f: F) -> SemanticActionResult<T1>
where
F: Fn(T) -> T1,
{
match self {
SemanticActionResult::Continue => SemanticActionResult::Continue,
SemanticActionResult::Return(t) => SemanticActionResult::Return(f(t)),
}
}
}
pub struct Lexer<'input, Iter: Iterator<Item = char> + Clone, Token, State, Error, Wrapper> {
pub __state: usize,
pub __done: bool,
pub __initial_state: usize,
user_state: State,
input: &'input str,
iter_loc: Loc,
pub __iter: Peekable<Iter>,
current_match_start: Loc,
current_match_end: Loc,
last_match: Option<(
Loc,
Peekable<Iter>,
for<'lexer> fn(&'lexer mut Wrapper) -> SemanticActionResult<Result<Token, Error>>,
Loc,
)>,
}
impl<I: Iterator<Item = char> + Clone, T, S: Default, E, W> Lexer<'static, I, T, S, E, W> {
pub fn new_from_iter(iter: I) -> Self {
Self::new_from_iter_with_state(iter, Default::default())
}
}
impl<I: Iterator<Item = char> + Clone, T, S, E, W> Lexer<'static, I, T, S, E, W> {
pub fn new_from_iter_with_state(iter: I, state: S) -> Self {
Self {
__state: 0,
__done: false,
__initial_state: 0,
user_state: state,
input: "",
iter_loc: Loc::ZERO,
__iter: iter.peekable(),
current_match_start: Loc::ZERO,
current_match_end: Loc::ZERO,
last_match: None,
}
}
}
impl<'input, T, S: Default, E, W> Lexer<'input, Chars<'input>, T, S, E, W> {
pub fn new(input: &'input str) -> Self {
Self::new_with_state(input, Default::default())
}
}
impl<'input, T, S, E, W> Lexer<'input, Chars<'input>, T, S, E, W> {
pub fn new_with_state(input: &'input str, state: S) -> Self {
Self {
__state: 0,
__done: false,
__initial_state: 0,
user_state: state,
input,
iter_loc: Loc::ZERO,
__iter: input.chars().peekable(),
current_match_start: Loc::ZERO,
current_match_end: Loc::ZERO,
last_match: None,
}
}
}
impl<'input, I: Iterator<Item = char> + Clone, T, S, E, W> Lexer<'input, I, T, S, E, W> {
pub fn next(&mut self) -> Option<char> {
match self.__iter.next() {
None => None,
Some(char) => {
self.current_match_end.byte_idx += char.len_utf8();
if char == '\n' {
self.current_match_end.line += 1;
self.current_match_end.col = 0;
} else if char == '\t' {
self.current_match_end.col += 4; } else {
self.current_match_end.col += UnicodeWidthChar::width(char).unwrap_or(1) as u32;
}
Some(char)
}
}
}
pub fn peek(&mut self) -> Option<char> {
self.__iter.peek().copied()
}
pub fn backtrack(
&mut self,
) -> Result<for<'lexer> fn(&'lexer mut W) -> SemanticActionResult<Result<T, E>>, LexerError<E>>
{
match self.last_match.take() {
None => {
self.__state = 0;
Err(LexerError {
location: self.current_match_start,
kind: LexerErrorKind::InvalidToken,
})
}
Some((match_start, iter, semantic_action, match_end)) => {
self.__done = false;
self.current_match_start = match_start;
self.current_match_end = match_end;
self.__iter = iter;
self.iter_loc = match_end;
Ok(semantic_action)
}
}
}
pub fn reset_accepting_state(&mut self) {
self.last_match = None;
}
pub fn set_accepting_state(
&mut self,
semantic_action_fn: for<'lexer> fn(&'lexer mut W) -> SemanticActionResult<Result<T, E>>,
) {
self.last_match = Some((
self.current_match_start,
self.__iter.clone(),
semantic_action_fn,
self.current_match_end,
));
}
pub fn reset_match(&mut self) {
self.current_match_start = self.current_match_end;
}
pub fn match_(&self) -> &'input str {
&self.input[self.current_match_start.byte_idx..self.current_match_end.byte_idx]
}
pub fn match_loc(&self) -> (Loc, Loc) {
(self.current_match_start, self.current_match_end)
}
pub fn state(&mut self) -> &mut S {
&mut self.user_state
}
}