1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
use crate::parser::INVALID_PROD;
use crate::{
FormatToken, LexerError, ProductionIndex, StateIndex, TerminalIndex, TokenStream, TokenVec,
UnexpectedToken,
};
use log::trace;
use std::cmp::Ordering;
use std::fmt::Debug;
use super::CompiledProductionIndex;
///
/// The transitions contain tuples: "from-state -> terminal-index -> to-state -> production index"
///
#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct Trans(
pub StateIndex,
pub TerminalIndex,
pub StateIndex,
pub CompiledProductionIndex,
);
///
/// The lookahead DFA. Used to calculate a certain production number from a
/// sequence of terminals.
///
/// The start state is per definition always the state with index 0.
///
/// In the generated parsers there always exists exactly one LookaheadDFA for
/// each non-terminal.
///
#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct LookaheadDFA {
/// Contains the production number in state 0, i.e. the state that the automaton is initially in
/// without applying any transitions
pub prod0: CompiledProductionIndex,
///
/// Transitions are sorted
/// * firstly by the index of the from-state,
/// * secondly by the terminal index
/// This way it is easy to detect the case where no match exists and to be
/// able to quickly terminate the search for an applicable transition.
///
pub transitions: &'static [Trans],
///
/// Maximum number of tokens needed to reach an accepting state
///
pub k: usize,
}
impl LookaheadDFA {
///
/// Creates a new instance with the given parameters.
///
pub fn new(prod0: CompiledProductionIndex, transitions: &'static [Trans], k: usize) -> Self {
Self {
prod0,
transitions,
k,
}
}
///
/// Calculates the next production to use for the given non-terminal.
/// Retrieves the lookahead tokens from the TokenStream object without
/// consuming any of them.
///
pub fn eval(&self, token_stream: &mut TokenStream<'_>) -> Result<ProductionIndex, LexerError> {
let mut state: StateIndex = 0;
let mut prod_num: CompiledProductionIndex = self.prod0;
let mut last_prod_num: CompiledProductionIndex = INVALID_PROD;
if self.k > token_stream.k {
return Err(LexerError::DataError(
"Lookahead size mismatch between token stream and Lookahead DFA",
));
}
let mut last_accepting_state: Option<StateIndex> = if prod_num > INVALID_PROD {
Some(state)
} else {
None
};
for i in 0..self.k {
// Read the current lookahead token and extract it's type
let current_lookahead_token = token_stream.lookahead_token_type(i)?;
// Filter the transitions with the matching from-state
let mut any_matching_found = false;
for i in 0..self.transitions.len() {
let current_transition = &self.transitions[i];
if current_transition.0 != state {
if any_matching_found {
// Since the transitions are sorted by from-state we
// have moved beyond the possible transitions and
// can safely stop the search here.
break;
} else {
// Try the next matching transition.
continue;
}
}
any_matching_found = true;
// Test if there exists a transition from the from-state
// via the current lookahead token
match current_transition.1.cmp(¤t_lookahead_token) {
Ordering::Equal => {
// Set the to_state and break into the outer for loop
// to finish if we found an accepting state or
// to read the next lookahead token if available.
trace!(
"{}, {} => {}",
state,
current_lookahead_token,
current_transition.2
);
// Set the state to the to-state
state = current_transition.2;
prod_num = current_transition.3;
// Test if the production in this transition is a valid one
// In this case the to-state is an accepting one.
if current_transition.3 > INVALID_PROD {
// In case we step too far, we can retrieve the last
// accepting state.
// Indeed we can step too far because the self.k is the
// maximum depth of all subtrees.
last_accepting_state = Some(current_transition.2);
last_prod_num = current_transition.3;
trace!("State {} accepts", current_transition.2);
}
break;
}
Ordering::Greater => {
// The token type is not found
break;
}
_ => (),
}
}
}
if prod_num > INVALID_PROD {
// The state is accepting, we can return the associated production number
trace!("Predict production {} at state {}", prod_num, state);
Ok(prod_num as ProductionIndex)
} else if let Some(last_state) = last_accepting_state {
debug_assert!(last_prod_num > INVALID_PROD);
trace!(
"Predict production {:?} from last accepting state {}",
last_prod_num,
last_state
);
Ok(last_prod_num as ProductionIndex)
} else {
trace!(
"Production prediction failed at state {} with token {:?}",
state,
token_stream.lookahead(0)
);
return Err(LexerError::PredictionError {
cause: format!("Production prediction failed at state {}", state),
});
}
}
///
/// Returns all terminals that lead from state 0 to a valid next state
///
pub fn build_error(
&self,
terminal_names: &'static [&'static str],
token_stream: &TokenStream<'_>,
) -> Result<(String, Vec<UnexpectedToken>, TokenVec), LexerError> {
let mut state = 0;
let mut diag_msg = String::new();
let mut unexpected_tokens = Vec::new();
let mut expected_tokens = TokenVec::default();
for (lookahead, token) in token_stream.tokens.iter().enumerate() {
let token_type = token.token_type;
if let Some(transition) = self
.transitions
.iter()
.position(|t| t.0 == state && t.1 == token_type)
{
diag_msg.push_str(
format!("LA({}): {} ", lookahead + 1, token.format(terminal_names)).as_str(),
);
unexpected_tokens.push(UnexpectedToken::new(
format!("LA({})", lookahead + 1),
terminal_names[token_type as usize].to_owned(),
token,
));
state = self.transitions[transition].2;
} else {
diag_msg.push_str(
format!("LA({}): {}.", lookahead + 1, token.format(terminal_names),).as_str(),
);
unexpected_tokens.push(UnexpectedToken::new(
format!("LA({})", lookahead + 1),
terminal_names[token_type as usize].to_owned(),
token,
));
expected_tokens = self.transitions.iter().filter(|t| t.0 == state).fold(
expected_tokens,
|mut acc, t| {
acc.push(format!(r#""{}""#, terminal_names[t.1 as usize]));
acc
},
);
break;
}
}
Ok((diag_msg, unexpected_tokens, expected_tokens))
}
}