use std::collections::{HashMap, HashSet};
pub mod parser {
use std::collections::{HashMap, HashSet};
use winnow::{
ModalResult, Parser,
ascii::multispace0,
combinator::{cut_err, delimited, separated, trace},
error::{ContextError, ErrMode, StrContext},
token::{any, take_while},
};
use crate::dfa::{Dfa, Symbol};
fn whitespace_wrapped<'i>(s: &str) -> impl Parser<&'i str, &'i str, ErrMode<ContextError>> {
trace("whitespace_wrapped", delimited(multispace0, s, multispace0))
}
pub fn parse_sigma_definition(input: &mut &str) -> ModalResult<Vec<char>> {
let identifier = whitespace_wrapped("Sigma");
let equals = whitespace_wrapped("=");
let element = delimited("'", any, cut_err("'"));
let separator = whitespace_wrapped(",");
let comma_sep_list = separated(1.., element, separator);
let setp = delimited(
delimited(multispace0, "{", multispace0),
comma_sep_list,
delimited(multispace0, cut_err("}"), multispace0),
);
let mut decl = (identifier, equals, setp).map(|(_, _, x)| x);
decl.parse_next(input)
}
pub fn state_name<'s>() -> impl Parser<&'s str, &'s str, ErrMode<ContextError>> {
take_while(1.., |c: char| c.is_alphanumeric() || c == '_')
}
pub fn state_set<'s>(min: usize) -> impl Parser<&'s str, Vec<&'s str>, ErrMode<ContextError>> {
let separator = whitespace_wrapped(",");
let comma_sep_list = separated(min.., state_name(), separator);
trace(
"state_set",
delimited(
delimited(multispace0, "{", multispace0),
comma_sep_list,
delimited(multispace0, cut_err("}"), multispace0),
),
)
}
pub fn parse_states_definition<'s>(input: &'s mut &str) -> ModalResult<Vec<&'s str>> {
let identifier = whitespace_wrapped("S");
let equals = whitespace_wrapped("=");
let mut decl = (identifier, equals, state_set(1)).map(|(_, _, x)| x);
decl.parse_next(input)
}
pub fn parse_final_states_definition<'s>(input: &'s mut &str) -> ModalResult<Vec<&'s str>> {
let identifier = whitespace_wrapped("F");
let equals = whitespace_wrapped("=");
let mut decl = (identifier, equals, state_set(0)).map(|(_, _, x)| x);
decl.parse_next(input)
}
pub fn parse_start_state_definition<'s>(input: &'s mut &str) -> ModalResult<&'s str> {
let identifier = whitespace_wrapped("start");
let equals = whitespace_wrapped("=");
let state = delimited(multispace0, state_name(), multispace0);
(identifier, equals, state)
.map(|(_, _, x)| x)
.parse_next(input)
}
pub fn delta_tuple<'s>() -> impl Parser<&'s str, (&'s str, char, &'s str), ErrMode<ContextError>>
{
let element = delimited("'", any, cut_err("'"));
let open_paren = whitespace_wrapped("(");
let close_paren = whitespace_wrapped(")");
let tuple = (
open_paren,
state_name(),
whitespace_wrapped(","),
element,
whitespace_wrapped(","),
state_name(),
close_paren,
);
trace(
"delta_tuple",
tuple.map(|(_, s_in, _, sym, _, s_out, _)| (s_in, sym, s_out)),
)
}
pub fn delta_set<'s>()
-> impl Parser<&'s str, Vec<(&'s str, char, &'s str)>, ErrMode<ContextError>> {
let separator = whitespace_wrapped(",");
let comma_sep_list = separated(0.., delta_tuple(), separator);
trace(
"delta_set",
delimited(
delimited(multispace0, "{", multispace0),
comma_sep_list,
delimited(multispace0, cut_err("}"), multispace0),
),
)
}
pub fn parse_delta_definition<'s>(
input: &'s mut &str,
) -> ModalResult<Vec<(&'s str, char, &'s str)>> {
let identifier = whitespace_wrapped("delta");
let equals = whitespace_wrapped("=");
let r = (identifier, equals, delta_set())
.map(|(_, _, x)| x)
.parse_next(input);
match r {
Ok(delta) => {
let mut delta_inputs = HashSet::new();
if delta
.iter()
.any(|(s_in, sym, _)| !delta_inputs.insert((s_in, sym)))
{
let mut context_error = ContextError::new();
context_error.push(StrContext::Label(
"The delta definition is non-deterministic.",
));
Err(ErrMode::Cut(context_error))
} else {
Ok(delta)
}
}
e => e,
}
}
pub fn parse_dfa_definition(input: &mut &str) -> Result<Dfa, String> {
let lines: Vec<String> = input
.lines()
.map(|l| l.trim().to_string())
.filter(|l| !l.is_empty())
.collect();
if lines.len() != 5 {
return Err("Incomplete definition".into());
}
let mut sigma: Option<HashSet<char>> = None;
let mut states: Option<HashSet<String>> = None;
let mut start: Option<String> = None;
let mut final_states: Option<HashSet<String>> = None;
let mut delta: Option<HashMap<(String, Symbol), String>> = None;
for line in lines {
let mut line: &str = &line;
if line.starts_with("Sigma") {
let r =
parse_sigma_definition(&mut line).map_err(|_| "Invalid 'Sigma' definition.")?;
sigma = Some(r.into_iter().collect());
} else if line.starts_with("start") {
let r = parse_start_state_definition(&mut line)
.map_err(|_| "Invalid 'start' definition.")?;
start = Some(r.to_string());
} else if line.starts_with("S") {
let r =
parse_states_definition(&mut line).map_err(|_| "Invalid 'S' definition.")?;
states = Some(r.into_iter().map(|s| s.to_string()).collect());
} else if line.starts_with("F") {
let r = parse_final_states_definition(&mut line)
.map_err(|_| "Invalid 'F' definition.")?;
final_states = Some(r.into_iter().map(|s| s.to_string()).collect());
} else if line.starts_with("delta") {
let r =
parse_delta_definition(&mut line).map_err(|_| "Invalid 'delta' definition.")?;
delta = Some(
r.into_iter()
.map(|(s_in, sym, s_out)| ((s_in.to_string(), sym), s_out.to_string()))
.collect(),
);
} else {
return Err(format!("Can't parse line '{line}'."));
}
}
if let (Some(sigma), Some(states), Some(start), Some(final_states), Some(delta)) =
(sigma, states, start, final_states, delta)
{
Dfa::new(states, sigma, delta, final_states, start)
} else {
Err("Incomplete definition".into())
}
}
}
type State = String;
type Symbol = char;
pub struct Dfa {
pub(crate) states: HashSet<State>,
pub(crate) sigma: HashSet<Symbol>,
pub(crate) delta: HashMap<(State, Symbol), State>,
pub(crate) final_states: HashSet<State>,
pub(crate) start_state: State,
}
impl Dfa {
pub fn new(
states: HashSet<State>,
sigma: HashSet<Symbol>,
delta: HashMap<(State, Symbol), State>,
final_states: HashSet<State>,
start_state: State,
) -> Result<Self, String> {
if !states.contains(&start_state) {
return Err("start must be an element of S.".into());
}
if !final_states.is_subset(&states) {
return Err("F must be a subset of S.".into());
}
let (mut unknown_delta_states, mut unknown_delta_symbols) = delta.iter().fold(
(vec![], vec![]),
|(mut unknown_states, mut unknown_symbols), ((s_in, sym), s_out)| {
if !states.contains(s_in) {
unknown_states.push(s_in.as_str());
}
if !states.contains(s_out) {
unknown_states.push(s_out.as_str());
}
if !sigma.contains(sym) {
unknown_symbols.push(sym.to_string());
}
(unknown_states, unknown_symbols)
},
);
if !unknown_delta_states.is_empty() {
unknown_delta_states.sort();
unknown_delta_states.dedup();
let s = unknown_delta_states.join(", ");
let msg = format!("The delta relation contains the following unknown states: {s}");
return Err(msg);
}
if !unknown_delta_symbols.is_empty() {
unknown_delta_symbols.sort();
unknown_delta_symbols.dedup();
let s = unknown_delta_symbols.join(", ");
let msg = format!("The delta relation contains the following unknown symbols: {s}");
return Err(msg);
}
Ok(Dfa {
states,
sigma,
delta,
final_states,
start_state,
})
}
pub fn sigma(&self) -> &HashSet<Symbol> {
&self.sigma
}
pub fn states(&self) -> &HashSet<State> {
&self.states
}
pub fn start_state(&self) -> &State {
&self.start_state
}
pub fn final_states(&self) -> &HashSet<State> {
&self.final_states
}
pub fn delta(&self) -> &HashMap<(State, Symbol), State> {
&self.delta
}
pub fn accepts(&self, word: &str) -> bool {
let mut running_dfa = RunningDfa::new(self, word);
while running_dfa.transition() {}
running_dfa.accepted()
}
}
pub struct RunningDfa<'a> {
pub(crate) dfa: &'a Dfa,
pub(crate) current_state: &'a State,
pub(crate) transitions: Vec<(Symbol, &'a State)>,
pub(crate) remaining_input: Vec<Symbol>,
pub(crate) accepted_input: Vec<Symbol>,
}
impl<'a> RunningDfa<'a> {
pub fn new(dfa: &'a Dfa, word: &str) -> Self {
Self {
dfa,
current_state: &dfa.start_state,
transitions: vec![],
remaining_input: word.to_string().chars().collect(),
accepted_input: vec![],
}
}
pub fn transition(&mut self) -> bool {
match self.remaining_input.first() {
None => false,
Some(symbol) => {
if let Some(next_state) = self.dfa.delta.get(&(self.current_state.clone(), *symbol))
{
self.current_state = next_state;
self.transitions.push((*symbol, next_state));
self.accepted_input.push(*symbol);
self.remaining_input.remove(0);
true
} else {
false
}
}
}
}
pub fn accepts(&mut self) -> bool {
while self.transition() {}
self.accepted()
}
pub fn accepted(&self) -> bool {
self.dfa.final_states.contains(self.current_state) && self.remaining_input.is_empty()
}
pub fn transitions(&self) -> &Vec<(char, &'a String)> {
&self.transitions
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn a_possible_transition_works() {
let dfa = Dfa::new(
HashSet::from(["s0".into(), "s1".into(), "s2".into()]),
HashSet::from(['a', 'b']),
HashMap::from([(("s0".into(), 'a'), "s1".into())]),
HashSet::from(["s2".into()]),
"s0".into(),
)
.unwrap();
let mut dfa_state = RunningDfa::new(&dfa, "a");
assert!(dfa_state.transition());
assert_eq!(dfa_state.current_state, "s1");
assert_eq!(dfa_state.accepted_input, vec!['a']);
assert!(dfa_state.remaining_input.is_empty());
}
#[test]
fn start_state_must_be_known() {
let dfa = Dfa::new(
HashSet::from(["s0".into(), "s1".into(), "s2".into()]),
HashSet::from(['a', 'b']),
HashMap::from([(("s0".into(), 'a'), "s1".into())]),
HashSet::from(["s2".into()]),
"sX".into(),
);
assert!(dfa.is_err());
}
#[test]
fn final_states_must_be_known() {
let dfa = Dfa::new(
HashSet::from(["s0".into(), "s1".into(), "s2".into()]),
HashSet::from(['a', 'b']),
HashMap::from([(("s0".into(), 'a'), "s1".into())]),
HashSet::from(["s2".into(), "sX".into()]),
"s0".into(),
);
assert!(dfa.is_err());
}
#[test]
fn delta_states_must_be_known() {
let dfa = Dfa::new(
HashSet::from(["s0".into(), "s1".into(), "s2".into()]),
HashSet::from(['a', 'b']),
HashMap::from([(("sX".into(), 'a'), "s1".into())]),
HashSet::from(["s2".into()]),
"s0".into(),
);
assert!(dfa.is_err());
let dfa = Dfa::new(
HashSet::from(["s0".into(), "s1".into(), "s2".into()]),
HashSet::from(['a', 'b']),
HashMap::from([(("s0".into(), 'a'), "sX".into())]),
HashSet::from(["s2".into()]),
"s0".into(),
);
assert!(dfa.is_err());
}
#[test]
fn delta_symbols_must_be_known() {
let dfa = Dfa::new(
HashSet::from(["s0".into(), "s1".into(), "s2".into()]),
HashSet::from(['a', 'b']),
HashMap::from([(("s0".into(), 'x'), "s1".into())]),
HashSet::from(["s2".into()]),
"s0".into(),
);
assert!(dfa.is_err());
}
#[test]
fn parsing_non_deterministic_delta_should_fail() {
let mut s = "delta = { (s0, 'a', s1), (s0, 'a', s2), (s0, 'b', s1), (s0, 'b', s2) }";
let r = parser::parse_delta_definition(&mut s);
assert!(r.is_err());
assert!(r.unwrap_err().to_string().contains("non-deterministic"));
}
#[test]
fn accepts_works() {
let dfa = Dfa::new(
HashSet::from(["s0".into(), "s1".into(), "s2".into()]),
HashSet::from(['a', 'b']),
HashMap::from([
(("s0".into(), 'a'), "s1".into()),
(("s1".into(), 'b'), "s2".into()),
]),
HashSet::from(["s2".into()]),
"s0".into(),
)
.unwrap();
assert!(dfa.accepts("ab"));
assert!(!dfa.accepts("abb"));
assert!(!dfa.accepts("aa"));
assert!(!dfa.accepts("a"));
assert!(!dfa.accepts("x"));
assert!(!dfa.accepts(""));
}
#[test]
fn parse_sigma_works() {
let mut s = "Sigma = { 'a' , 'b','c', ' ' } ";
let symbols = parser::parse_sigma_definition(&mut s).unwrap();
assert_eq!(symbols, vec!['a', 'b', 'c', ' ']);
}
#[test]
fn parse_empty_sigma_should_fail() {
let mut s = "Sigma = { } ";
let r = parser::parse_sigma_definition(&mut s);
assert!(r.is_err());
}
#[test]
fn parse_states_works() {
let mut s = "S = { s0 , s1,s2 } ";
let states = parser::parse_states_definition(&mut s).unwrap();
assert_eq!(states, vec!["s0", "s1", "s2"]);
}
#[test]
fn parse_empty_states_should_fail() {
let mut s = "S = {} ";
let r = parser::parse_states_definition(&mut s);
assert!(r.is_err());
}
#[test]
fn parse_final_states_works() {
let mut s = "F = { s_0 , s1 } ";
let states = parser::parse_final_states_definition(&mut s).unwrap();
assert_eq!(states, vec!["s_0", "s1"]);
}
#[test]
fn parse_empty_final_states_works() {
let mut s = "F = { } ";
let states = parser::parse_final_states_definition(&mut s).unwrap();
assert!(states.is_empty());
}
#[test]
fn parse_start_state_works() {
let mut s = "start = s0";
let state = parser::parse_start_state_definition(&mut s).unwrap();
assert_eq!(state, "s0");
}
#[test]
fn parse_delta_works() {
let mut s = "delta = { (s0, 'a', s1), (s1, 'b', s2) }";
let r = parser::parse_delta_definition(&mut s).unwrap();
assert_eq!(r, vec![("s0", 'a', "s1"), ("s1", 'b', "s2")]);
}
#[test]
fn parse_dfa_definition_works() {
let mut s = "
Sigma = { 'a', 'b' }
S = { s0, s1, s2 }
start = s0
F = { s2 }
delta = { (s0, 'a', s1), (s1, 'b', s2) }
";
let r = parser::parse_dfa_definition(&mut s);
assert!(r.is_ok());
let dfa = r.unwrap();
assert_eq!(
dfa.states,
HashSet::from_iter(["s0".into(), "s1".into(), "s2".into()])
);
assert_eq!(dfa.sigma, HashSet::from_iter(['a', 'b']));
assert_eq!(dfa.start_state, "s0");
assert_eq!(dfa.final_states, HashSet::from_iter(["s2".into()]));
assert_eq!(
dfa.delta,
HashMap::from_iter([
(("s0".into(), 'a'), "s1".into()),
(("s1".into(), 'b'), "s2".into())
])
);
assert!(dfa.accepts("ab"));
assert!(!dfa.accepts("abc"));
}
#[test]
fn parse_dfa_definition_with_missing_but_duplicated_parts_should_fail() {
let mut s = "
Sigma = { 'a', 'b' }
S = { s0, s1, s2 }
Sigma = { 'a', 'b' }
F = { s2 }
delta = { (s0, 'a', s1), (s1, 'b', s2) }
";
let r = parser::parse_dfa_definition(&mut s);
assert!(r.is_err());
}
#[test]
fn runningdfa_transitions_works() {
let mut s = "
Sigma = { 'a', 'b' }
S = { s0, s1, s2 }
start = s0
F = { s2 }
delta = { (s0, 'a', s1), (s1, 'b', s2) , (s2, 'b', s2) }
";
let r = parser::parse_dfa_definition(&mut s);
assert!(r.is_ok());
let dfa = r.unwrap();
let mut running_dfa = RunningDfa::new(&dfa, "abb");
assert!(running_dfa.accepts());
assert_eq!(
vec![
('a', &"s1".to_string()),
('b', &"s2".to_string()),
('b', &"s2".to_string())
],
running_dfa.transitions
);
}
}