use crate::{context::Context, input::Input, parser::State, position::SourceSpan, Position};
#[cfg(debug_assertions)]
use crate::{LOG, LOG_BOLD};
use core::fmt::Debug;
use std::marker::PhantomData;
#[cfg(debug_assertions)]
use yansi::Paint;
pub trait Lexer<'i, C, S, TK>
where
C: Context<'i, Self::Input, S, TK>,
S: State,
{
type Input: Input + ?Sized;
fn next_tokens(
&self,
context: &mut C,
input: &'i Self::Input,
expected_tokens: Vec<(TK, bool)>,
) -> Box<dyn Iterator<Item = Token<'i, Self::Input, TK>> + 'i>;
}
pub trait TokenRecognizer<'i> {
fn recognize(&self, _input: &'i str) -> Option<&'i str> {
panic!("Recognize is not defined.")
}
}
pub struct StringLexer<C, S, TK, TR: 'static, const TERMINAL_COUNT: usize> {
skip_ws: bool,
token_recognizers: &'static [TR; TERMINAL_COUNT],
phantom: PhantomData<(C, S, TK)>,
}
impl<
'i,
C: Context<'i, str, S, TK>,
S: State,
TK,
TR: TokenRecognizer<'i>,
const TERMINAL_COUNT: usize,
> StringLexer<C, S, TK, TR, TERMINAL_COUNT>
{
pub fn new(skip_ws: bool, token_recognizers: &'static [TR; TERMINAL_COUNT]) -> Self {
Self {
skip_ws,
token_recognizers,
phantom: PhantomData,
}
}
fn skip(input: &'i str, context: &mut C) {
let skipped_len: usize = input[context.position().pos..]
.chars()
.take_while(|x| x.is_whitespace())
.map(|c| c.len_utf8())
.sum();
if skipped_len > 0 {
let skipped = &input[context.position().pos..context.position().pos + skipped_len];
log!("\t{} {}", "Skipped ws:".paint(LOG_BOLD), skipped_len);
context.set_layout_ahead(Some(skipped));
context.set_position(skipped.position_after(context.position()));
} else {
context.set_layout_ahead(None);
}
}
}
struct TokenIterator<'i, TR: 'static, TK> {
input: &'i str,
position: Position,
token_recognizers: Vec<(&'static TR, TK, bool)>,
index: usize,
finish: bool,
}
impl<'i, TR, TK> TokenIterator<'i, TR, TK> {
fn new(
input: &'i str,
position: Position,
token_recognizers: Vec<(&'static TR, TK, bool)>,
) -> Self {
Self {
input,
position,
token_recognizers,
index: 0,
finish: false,
}
}
}
impl<'i, TK, TR> Iterator for TokenIterator<'i, TR, TK>
where
TR: TokenRecognizer<'i>,
TK: Copy,
{
type Item = Token<'i, str, TK>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if !self.finish && self.index < self.token_recognizers.len() {
let (recognizer, token_kind, finish) = &self.token_recognizers[self.index];
self.index += 1;
if let Some(recognized) = recognizer.recognize(&self.input[self.position.pos..]) {
self.finish = *finish;
return Some(Token {
kind: *token_kind,
value: recognized,
span: recognized.span_from(self.position),
});
}
} else {
return None;
}
}
}
}
impl<'i, C, S, TK, TR, const TERMINAL_COUNT: usize> Lexer<'i, C, S, TK>
for StringLexer<C, S, TK, TR, TERMINAL_COUNT>
where
C: Context<'i, str, S, TK>,
S: State + Into<usize>,
TK: Debug + Into<usize> + Copy + 'i,
TR: TokenRecognizer<'i>,
{
type Input = str;
fn next_tokens(
&self,
context: &mut C,
input: &'i Self::Input,
expected_tokens: Vec<(TK, bool)>,
) -> Box<dyn Iterator<Item = Token<'i, Self::Input, TK>> + 'i> {
if self.skip_ws {
Self::skip(input, context);
}
log!(
" {} {:?}",
"Trying recognizers:".paint(LOG),
expected_tokens
);
Box::new(TokenIterator::new(
input,
context.position(),
expected_tokens
.iter()
.map(|&tok| (&self.token_recognizers[tok.0.into()], tok.0, tok.1))
.collect::<Vec<_>>(),
))
}
}
pub struct Token<'i, I: Input + ?Sized, TK> {
pub kind: TK,
pub value: &'i I,
pub span: SourceSpan,
}
impl<I: Input + ?Sized, TK: Copy> Clone for Token<'_, I, TK> {
fn clone(&self) -> Self {
Self {
kind: self.kind,
value: self.value,
span: self.span,
}
}
}
impl<I, TK> Debug for Token<'_, I, TK>
where
I: Input + ?Sized,
I::Output: Debug,
TK: Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{:?}({:?} {:?})",
self.kind,
if self.value.len() > 50 {
format!(
"{:?}{}{:?}",
&self.value.slice(0..20),
"..<snip>..",
&self.value.slice(self.value.len() - 20..self.value.len())
)
} else {
format!("{:?}", self.value)
},
self.span
)
}
}