use crate::parser::INVALID_PROD;
use crate::{
FormatToken, LexerError, NonTerminalIndex, ParolError, ParserError, ProductionIndex,
StateIndex, TerminalIndex, TokenStream, TokenVec, UnexpectedToken,
};
use log::trace;
use std::cmp::Ordering;
use std::fmt::Debug;
use super::CompiledProductionIndex;
#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct Trans(
pub StateIndex,
pub TerminalIndex,
pub StateIndex,
pub CompiledProductionIndex,
);
#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct LookaheadDFA {
pub prod0: CompiledProductionIndex,
pub transitions: &'static [Trans],
pub k: usize,
}
impl LookaheadDFA {
pub fn new(prod0: CompiledProductionIndex, transitions: &'static [Trans], k: usize) -> Self {
Self {
prod0,
transitions,
k,
}
}
#[inline(always)]
pub fn eval<F: Fn(char) -> Option<usize> + Clone>(
&self,
token_stream: &mut TokenStream<'_, F>,
non_terminal: NonTerminalIndex,
) -> Result<ProductionIndex, ParolError> {
let mut state: StateIndex = 0;
let mut prod_num: CompiledProductionIndex = self.prod0;
let mut last_prod_num: CompiledProductionIndex = INVALID_PROD;
if self.k > token_stream.k {
return Err(ParserError::DataError(
"Lookahead size mismatch between token stream and Lookahead DFA",
)
.into());
}
let mut last_accepting_state: Option<StateIndex> = if prod_num > INVALID_PROD {
Some(state)
} else {
None
};
for i in 0..self.k {
let current_lookahead_token = token_stream.lookahead_token_type(i)?;
let mut any_matching_found = false;
for i in 0..self.transitions.len() {
let current_transition = &self.transitions[i];
if current_transition.0 != state {
if any_matching_found {
break;
} else {
continue;
}
}
any_matching_found = true;
match current_transition.1.cmp(¤t_lookahead_token) {
Ordering::Equal => {
trace!(
"{}, {} => {}",
state, current_lookahead_token, current_transition.2
);
state = current_transition.2;
prod_num = current_transition.3;
if current_transition.3 > INVALID_PROD {
last_accepting_state = Some(current_transition.2);
last_prod_num = current_transition.3;
trace!("State {} accepts", current_transition.2);
}
break;
}
Ordering::Greater => {
break;
}
_ => (),
}
}
}
if prod_num > INVALID_PROD {
trace!("Predict production {prod_num} at state {state}");
Ok(prod_num as ProductionIndex)
} else if let Some(last_state) = last_accepting_state {
debug_assert!(last_prod_num > INVALID_PROD);
trace!("Predict production {last_prod_num:?} from last accepting state {last_state}");
Ok(last_prod_num as ProductionIndex)
} else {
trace!(
"Production prediction failed at state {} with tokens {:?}",
state,
token_stream.token_types()
);
Err(ParserError::PredictionError {
cause: format!("Production prediction failed for non-terminal {non_terminal}",),
}
.into())
}
}
pub fn build_error<F: Fn(char) -> Option<usize> + Clone>(
&self,
terminal_names: &'static [&'static str],
token_stream: &TokenStream<'_, F>,
) -> Result<(String, Vec<UnexpectedToken>, TokenVec), LexerError> {
let mut state = 0;
let mut diag_msg = String::new();
let mut unexpected_tokens = Vec::new();
let mut expected_tokens = TokenVec::default();
for (lookahead, token) in token_stream.tokens.non_skip_tokens().enumerate() {
let token_type = token.token_type;
if let Some(transition) = self
.transitions
.iter()
.position(|t| t.0 == state && t.1 == token_type)
{
diag_msg.push_str(
format!("LA({}): {} ", lookahead + 1, token.format(terminal_names)).as_str(),
);
unexpected_tokens.push(UnexpectedToken::new(
format!("LA({})", lookahead + 1),
terminal_names[token_type as usize].to_owned(),
token,
));
state = self.transitions[transition].2;
} else {
diag_msg.push_str(
format!("LA({}): {}.", lookahead + 1, token.format(terminal_names),).as_str(),
);
unexpected_tokens.push(UnexpectedToken::new(
format!("LA({})", lookahead + 1),
terminal_names[token_type as usize].to_owned(),
token,
));
expected_tokens = self.transitions.iter().filter(|t| t.0 == state).fold(
expected_tokens,
|mut acc, t| {
acc.push(terminal_names[t.1 as usize].to_owned());
acc
},
);
break;
}
}
Ok((diag_msg, unexpected_tokens, expected_tokens))
}
}