use std::{
collections::{BTreeMap, BTreeSet},
vec,
};
use log::trace;
use crate::internal::compiled_dfa::StateData;
use super::{
compiled_dfa::CompiledDfa,
ids::{StateGroupID, StateGroupIDBase, StateID},
CharClassID, StateIDBase, TerminalID,
};
type StateGroup = BTreeSet<StateID>;
type Partition = Vec<StateGroup>;
type TransitionMap = BTreeMap<StateID, BTreeMap<CharClassID, Vec<StateID>>>;
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct TransitionsToPartitionGroups(pub(crate) Vec<(CharClassID, StateGroupID)>);
impl TransitionsToPartitionGroups {
pub(crate) fn new() -> Self {
TransitionsToPartitionGroups::default()
}
pub(crate) fn with_capacity(capacity: usize) -> Self {
TransitionsToPartitionGroups(Vec::with_capacity(capacity))
}
pub(crate) fn insert(&mut self, char_class: CharClassID, partition_group: StateGroupID) {
self.0.push((char_class, partition_group));
}
}
#[derive(Debug)]
pub(crate) struct Minimizer;
impl Minimizer {
pub(crate) fn minimize(dfa: CompiledDfa) -> CompiledDfa {
trace!("Minimize DFA ----------------------------");
trace!("Initial DFA:\n{}", dfa);
let mut transitions = TransitionMap::new();
dfa.states.iter().enumerate().for_each(|(id, state)| {
transitions.entry((id as StateIDBase).into()).or_default();
for t in &state.transitions {
let t_of_s = transitions.get_mut(&(id as StateIDBase).into()).unwrap();
t_of_s.entry(t.0).or_default().push(t.1.id().into());
t_of_s.get_mut(&t.0).unwrap().sort();
t_of_s.get_mut(&t.0).unwrap().dedup();
}
});
trace!("Transitions: {:?}", transitions);
let mut partition_old = Self::calculate_initial_partition(&dfa);
Self::trace_partition("initial", &partition_old);
let mut partition_new = Partition::new();
let mut changed = true;
while changed {
partition_new = Self::calculate_new_partition(&partition_old, &transitions);
Self::trace_partition("new", &partition_new);
changed = partition_new != partition_old;
partition_old.clone_from(&partition_new);
}
Self::create_from_partition(dfa, &partition_new, &transitions)
}
fn calculate_initial_partition(dfa: &CompiledDfa) -> Partition {
let mut terminal_map = dfa
.end_states
.iter()
.filter_map(|(accept, id)| if *accept { Some(*id) } else { None })
.collect::<Vec<_>>();
terminal_map.sort();
terminal_map.dedup();
let number_of_end_states = terminal_map.len();
let mut initial_partition = vec![StateGroup::new(); number_of_end_states + 1];
for state in 0..dfa.states.len() {
let state: StateID = (state as u32).into();
if dfa.end_states[state].0 {
let terminal_id = dfa.end_states[state].1;
let index = terminal_map
.iter()
.position(|id| *id == terminal_id)
.unwrap();
initial_partition[index + 1].insert(state);
} else {
initial_partition[0].insert(state);
}
}
initial_partition
}
fn calculate_new_partition(partition: &[StateGroup], transitions: &TransitionMap) -> Partition {
let mut new_partition = Partition::new();
for (index, group) in partition.iter().enumerate() {
Self::split_group(index, group, partition, transitions)
.into_iter()
.for_each(|new_group| {
new_partition.push(new_group);
});
}
new_partition
}
fn split_group(
group_index: usize,
group: &StateGroup,
partition: &[StateGroup],
transitions: &TransitionMap,
) -> Partition {
if group.len() == 1 {
return vec![group.clone()];
}
trace!("Split group {}: {:?}", group_index, group);
let mut transition_map_to_states: BTreeMap<TransitionsToPartitionGroups, StateGroup> =
BTreeMap::new();
for state_id in group {
let transitions_to_partition =
Self::build_transitions_to_partition_group(*state_id, partition, transitions);
transition_map_to_states
.entry(transitions_to_partition)
.or_default()
.insert(*state_id);
}
transition_map_to_states
.into_values()
.collect::<Partition>()
}
fn build_transitions_to_partition_group(
state_id: StateID,
partition: &[StateGroup],
transitions: &TransitionMap,
) -> TransitionsToPartitionGroups {
if let Some(transitions_of_state) = transitions.get(&state_id) {
let mut transitions_to_partition_groups =
TransitionsToPartitionGroups::with_capacity(transitions_of_state.len());
for transition in transitions_of_state {
for target_state in transition.1.iter() {
let partition_group = Self::find_group(*target_state, partition).unwrap();
transitions_to_partition_groups.insert(*transition.0, partition_group);
}
}
Self::trace_transitions_to_groups(state_id, &transitions_to_partition_groups);
transitions_to_partition_groups
} else {
trace!("** State {} has no transitions.", state_id);
TransitionsToPartitionGroups::new()
}
}
fn find_group(state_id: StateID, partition: &[StateGroup]) -> Option<StateGroupID> {
partition
.iter()
.position(|group| group.contains(&state_id))
.map(|id| (id as StateGroupIDBase).into())
}
fn create_from_partition(
dfa: CompiledDfa,
partition: &[StateGroup],
transitions: &TransitionMap,
) -> CompiledDfa {
trace!("Create DFA ------------------------------");
trace!("from partition {:?}", partition);
let CompiledDfa {
patterns,
terminal_ids,
states: _,
end_states,
lookaheads,
current_states,
next_states,
} = dfa;
let mut dfa = CompiledDfa {
patterns,
terminal_ids,
states: vec![StateData::new(); partition.len()],
end_states: vec![(false, 0.into()); partition.len()],
lookaheads,
current_states,
next_states,
};
let mut partition = partition.to_vec();
partition.sort_by(|a, b| {
if a.contains(&StateID::new(0)) {
return std::cmp::Ordering::Less;
}
if b.contains(&StateID::new(0)) {
return std::cmp::Ordering::Greater;
}
std::cmp::Ordering::Equal
});
for (id, group) in partition.iter().enumerate() {
Self::add_representative_state(
&mut dfa,
(id as StateGroupIDBase).into(),
group,
&end_states,
);
}
Self::update_transitions(&mut dfa, &partition, transitions);
trace!("Minimized DFA:\n{}", dfa);
dfa
}
fn add_representative_state(
dfa: &mut CompiledDfa,
group_id: StateGroupID,
group: &BTreeSet<StateID>,
end_states: &[(bool, TerminalID)],
) -> StateID {
let state_id = StateID::new(group_id.id() as StateIDBase);
let state = StateData::new();
let representative_state_id = group.first().unwrap();
trace!(
"Add representative state {} with id {}",
representative_state_id.as_usize(),
state_id.as_usize()
);
for state_in_group in group.iter() {
if end_states[*state_in_group].0 {
dfa.end_states[state_id] = (true, end_states[*state_in_group].1);
}
}
dfa.states[state_id] = state;
state_id
}
fn update_transitions(
dfa: &mut CompiledDfa,
partition: &[StateGroup],
transitions: &TransitionMap,
) {
let mut transitions = transitions
.iter()
.map(|(s, t)| (*s, t.clone()))
.collect::<Vec<_>>();
Self::merge_transitions(partition, &mut transitions);
Self::renumber_states_in_transitions(partition, &mut transitions);
for (state_id, transitions_of_state) in transitions {
let state_id = state_id.as_usize();
for (char_class, target_states) in transitions_of_state.iter() {
for target_state in target_states {
trace!(
"Add transition {} --{}--> {}",
state_id,
char_class,
target_state
);
if !dfa.states[state_id]
.transitions
.contains(&(*char_class, target_state.id().into()))
{
dfa.states[state_id]
.transitions
.push((*char_class, target_state.id().into()));
}
}
}
}
}
fn merge_transitions(
partition: &[StateGroup],
transitions: &mut Vec<(StateID, BTreeMap<CharClassID, Vec<StateID>>)>,
) {
for group in partition {
debug_assert!(!group.is_empty());
if group.len() == 1 {
continue;
}
let representative_state_id = group.first().unwrap();
for state_id in group.iter().skip(1) {
Self::merge_transitions_of_state(*state_id, *representative_state_id, transitions);
}
}
}
fn merge_transitions_of_state(
state_id: StateID,
representative_state_id: StateID,
transitions: &mut Vec<(StateID, BTreeMap<CharClassID, Vec<StateID>>)>,
) {
if let Some(rep_pos) = transitions
.iter()
.position(|(s, _)| *s == representative_state_id)
{
let mut rep_trans = transitions.get_mut(rep_pos).unwrap().1.clone();
if let Some(pos) = transitions.iter().position(|(s, _)| *s == state_id) {
let (_, transitions_of_state) = transitions.get_mut(pos).unwrap();
for (char_class, target_states) in transitions_of_state.iter() {
rep_trans
.entry(*char_class)
.and_modify(|e| {
for s in target_states {
if !e.contains(s) {
e.push(*s)
}
}
})
.or_insert(target_states.clone());
}
transitions.remove(pos);
}
transitions[rep_pos].1 = rep_trans;
}
}
fn renumber_states_in_transitions(
partition: &[StateGroup],
transitions: &mut [(StateID, BTreeMap<CharClassID, Vec<StateID>>)],
) {
let find_group_of_state = |state_id: StateID| -> StateID {
for (group_id, group) in partition.iter().enumerate() {
if group.contains(&state_id) {
return StateID::new(group_id as StateIDBase);
}
}
panic!("State {} not found in partition.", state_id.as_usize());
};
for transition in transitions.iter_mut() {
transition.0 = find_group_of_state(transition.0);
for target_states in transition.1.values_mut() {
for target_state in target_states.iter_mut() {
*target_state = find_group_of_state(*target_state);
}
}
}
}
#[allow(dead_code)]
fn trace_partition(context: &str, partition: &[StateGroup]) {
trace!("Partition {}:", context);
for (i, group) in partition.iter().enumerate() {
trace!("Group {}: {:?}", i, group);
}
}
#[allow(dead_code)]
fn trace_transitions_to_groups(
state_id: StateID,
transitions_to_groups: &TransitionsToPartitionGroups,
) {
trace!(" Transitions of state {} to groups:", state_id.as_usize());
for (char_class, group) in &transitions_to_groups.0 {
trace!(" cc# {} -> gr# {}", char_class, group);
}
}
}
#[cfg(test)]
mod tests {
use rustc_hash::FxHashMap;
use super::*;
use crate::internal::compiled_dfa::StateData;
#[test]
fn test_calculate_initial_partition() {
let dfa = CompiledDfa {
patterns: vec![],
terminal_ids: vec![0.into(), 1.into(), 2.into()],
states: vec![StateData::new(); 5],
end_states: vec![
(true, 0.into()),
(true, 1.into()),
(false, 0.into()),
(false, 0.into()),
(true, 2.into()),
],
lookaheads: FxHashMap::default(),
current_states: vec![],
next_states: vec![],
};
let partition = Minimizer::calculate_initial_partition(&dfa);
assert_eq!(partition.len(), 4);
assert_eq!(partition[0].len(), 2);
assert_eq!(partition[1].len(), 1);
assert_eq!(partition[2].len(), 1);
assert_eq!(partition[3].len(), 1);
}
#[test]
fn test_calculate_new_partition() {
let dfa = CompiledDfa {
patterns: vec![],
terminal_ids: vec![0.into(), 1.into(), 2.into()],
states: vec![StateData::new(); 5],
end_states: vec![
(true, 0.into()),
(true, 1.into()),
(false, 0.into()),
(false, 0.into()),
(true, 2.into()),
],
lookaheads: FxHashMap::default(),
current_states: vec![],
next_states: vec![],
};
let transitions: TransitionMap = vec![
(
0.into(),
vec![(0.into(), vec![1.into()]), (1.into(), vec![2.into()])]
.into_iter()
.collect(),
),
(
1.into(),
vec![(0.into(), vec![1.into()]), (1.into(), vec![2.into()])]
.into_iter()
.collect(),
),
(
2.into(),
vec![(0.into(), vec![1.into()]), (1.into(), vec![2.into()])]
.into_iter()
.collect(),
),
(
3.into(),
vec![(0.into(), vec![1.into()]), (1.into(), vec![2.into()])]
.into_iter()
.collect(),
),
(
4.into(),
vec![(0.into(), vec![1.into()]), (1.into(), vec![2.into()])]
.into_iter()
.collect(),
),
]
.into_iter()
.collect();
let partition_old = Minimizer::calculate_initial_partition(&dfa);
let partition_new = Minimizer::calculate_new_partition(&partition_old, &transitions);
assert_eq!(partition_new.len(), 4);
assert_eq!(partition_new[0].len(), 2);
assert_eq!(partition_new[1].len(), 1);
assert_eq!(partition_new[2].len(), 1);
assert_eq!(partition_new[3].len(), 1);
}
}