use crate::compile::AlphabetOrder;
use proc_macro2::TokenStream;
use std::collections::HashSet;
use std::fmt::Debug;
use std::fmt::Display;
use std::fmt::Formatter;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::rc::Rc;
use syn::Path;
pub struct Symbol {
pub(super) name: Path,
}
impl Eq for Symbol {}
impl PartialEq for Symbol {
fn eq(&self, other: &Self) -> bool {
self.to_string() == other.to_string()
}
}
impl Debug for Symbol {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
Display::fmt(self, f)
}
}
impl Hash for Symbol {
fn hash<H: Hasher>(&self, state: &mut H) {
self.to_string().hash(state)
}
}
impl Display for Symbol {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let name = &self.name;
write!(f, "{}", quote::quote!(#name))
}
}
impl From<&str> for Symbol {
fn from(value: &str) -> Self {
Self {
name: syn::parse2(value.parse::<TokenStream>().unwrap()).unwrap(),
}
}
}
#[derive(Hash, Clone, PartialEq, Eq)]
pub enum Regex {
EmptyString,
EmptySet,
Symbol(Rc<Symbol>),
Repeat(Rc<Regex>),
Complement(Rc<Regex>),
Or(Rc<Regex>, Rc<Regex>),
And(Rc<Regex>, Rc<Regex>),
Concat(Rc<Regex>, Rc<Regex>),
}
impl Debug for Regex {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::EmptyString => write!(f, "e"),
Self::EmptySet => write!(f, "0"),
Self::Symbol(sym) => write!(f, "{:?}", sym.name.get_ident()),
Self::Repeat(re) => write!(f, "{:?}*", *re),
Self::Complement(re) => write!(f, "~{:?}", *re),
Self::Or(l, r) => write!(f, "{:?} | {:?}", *l, *r),
Self::And(l, r) => write!(f, "{:?} & {:?}", *l, *r),
Self::Concat(l, r) => write!(f, "{:?} {:?}", *l, *r),
}
}
}
impl Regex {
pub(crate) fn derive(&self, symbol: Option<&Rc<Symbol>>) -> Rc<Regex> {
match self {
Regex::EmptySet => Regex::EmptySet.into(),
Regex::EmptyString => Regex::EmptySet.into(),
Regex::Symbol(s) => {
if Some(s) == symbol {
Regex::EmptyString.into()
} else {
Regex::EmptySet.into()
}
}
Regex::Repeat(r) => {
Regex::Concat(r.derive(symbol), Regex::Repeat(r.clone()).into()).into()
}
Regex::Complement(c) => Regex::Complement(c.derive(symbol)).into(),
Regex::Or(l, r) => Regex::Or(l.derive(symbol), r.derive(symbol)).into(),
Regex::And(l, r) => Regex::And(l.derive(symbol), r.derive(symbol)).into(),
Regex::Concat(l, r) => {
let new = Regex::Concat(l.derive(symbol), r.clone()).into();
if l.is_nullable() {
Regex::Or(new, r.derive(symbol)).into()
} else {
new
}
}
}
}
pub(crate) fn normalize(self: &Rc<Self>, ab: &AlphabetOrder) -> Rc<Regex> {
match self.deref() {
Regex::EmptyString => self.clone(),
Regex::EmptySet => self.clone(),
Regex::Symbol(_) => self.clone(),
Regex::Repeat(r) => {
let r = r.normalize(ab);
match r.deref() {
Regex::EmptySet => Regex::EmptyString.into(),
Regex::EmptyString => r,
Regex::Repeat(_) => r,
_ => Regex::Repeat(r).into(),
}
}
Regex::Complement(c) => {
let c = c.normalize(ab);
match c.deref() {
Regex::Complement(c) => c.clone(),
_ => Regex::Complement(c).into(),
}
}
Regex::Or(l, r) => normalize_or(l, r, ab),
Regex::And(l, r) => normalize_and(l, r, ab),
Regex::Concat(l, r) => {
let l = l.normalize(ab);
let r = r.normalize(ab);
match (l.deref(), r.deref()) {
(Regex::EmptySet, _) => l,
(Regex::EmptyString, _) => r,
(Regex::Concat(il, ir), _) => {
Regex::Concat(il.clone(), Regex::Concat(ir.clone(), r).into()).into()
}
(_, Regex::EmptySet) => r,
(_, Regex::EmptyString) => l,
_ => Regex::Concat(l, r).into(),
}
}
}
}
pub fn is_nullable(&self) -> bool {
match self {
Regex::EmptySet => false,
Regex::EmptyString => true,
Regex::Symbol(_) => false,
Regex::Concat(l, r) => l.is_nullable() && r.is_nullable(),
Regex::Repeat(_) => true,
Regex::Or(l, r) => l.is_nullable() || r.is_nullable(),
Regex::And(l, r) => l.is_nullable() && r.is_nullable(),
Regex::Complement(i) => !i.is_nullable(),
}
}
pub(crate) fn alphabet(&self) -> HashSet<Rc<Symbol>> {
let mut alphabet = HashSet::new();
self.search_alphabet(&mut alphabet);
alphabet
}
fn search_alphabet(&self, alphabet: &mut HashSet<Rc<Symbol>>) {
match self {
Regex::EmptyString => {}
Regex::EmptySet => {}
Regex::Symbol(s) => {
alphabet.insert(s.clone());
}
Regex::Repeat(i) | Regex::Complement(i) => i.search_alphabet(alphabet),
Regex::Or(l, r) | Regex::And(l, r) | Regex::Concat(l, r) => {
l.search_alphabet(alphabet);
r.search_alphabet(alphabet);
}
}
}
fn order(&self, ab: &AlphabetOrder) -> i64 {
match self {
Regex::EmptySet => 1,
Regex::EmptyString => 2,
Regex::Concat(_, _) => 3,
Regex::Repeat(_) => 4,
Regex::Or(_, _) => 5,
Regex::And(_, _) => 6,
Regex::Complement(_) => 7,
Regex::Symbol(s) => ab.get(s) + 8,
}
}
fn compare(&self, other: &Regex, ab: &AlphabetOrder) -> i64 {
match (self, other) {
(Regex::Concat(l1, r1), Regex::Concat(l2, r2)) => {
let ans = l1.compare(l2, ab);
if ans == 0 {
r1.compare(r2, ab)
} else {
ans
}
}
(Regex::Repeat(l), Regex::Repeat(r)) => l.compare(r, ab),
(Regex::Or(l1, r1), Regex::Or(l2, r2)) => {
let ans = l1.compare(l2, ab);
if ans == 0 {
r1.compare(r2, ab)
} else {
ans
}
}
(Regex::And(l1, r1), Regex::And(l2, r2)) => {
let ans = l1.compare(l2, ab);
if ans == 0 {
r1.compare(r2, ab)
} else {
ans
}
}
(Regex::Complement(l), Regex::Complement(r)) => l.compare(r, ab),
_ => self.order(ab) - other.order(ab),
}
}
}
fn normalize_or(l: &Rc<Regex>, r: &Rc<Regex>, ab: &AlphabetOrder) -> Rc<Regex> {
let l = l.normalize(ab);
let r = r.normalize(ab);
match (l.deref(), r.deref()) {
(a, b) if a == b => l,
(Regex::EmptySet, _) => r,
(Regex::EmptyString, rr) if rr.is_nullable() => r,
(Regex::Or(il, ir), _) => normalize_or(il, &normalize_or(ir, &r, ab), ab),
(Regex::Complement(c), _) if c.deref() == &Regex::EmptySet => l,
(_, Regex::Or(il, ir)) if l.compare(il, ab) > 0 => {
normalize_or(il, &normalize_or(&l, ir, ab), ab)
}
_ if l.compare(&r, ab) > 0 => normalize_or(&r, &l, ab),
_ => Regex::Or(l, r).into(),
}
}
fn normalize_and(l: &Rc<Regex>, r: &Rc<Regex>, ab: &AlphabetOrder) -> Rc<Regex> {
let l = l.normalize(ab);
let r = r.normalize(ab);
match (l.deref(), r.deref()) {
(a, b) if a == b => l,
(Regex::EmptySet, _) => l,
(Regex::EmptyString, r) if r.is_nullable() => l,
(Regex::EmptyString, _) => Regex::EmptySet.into(),
(Regex::And(il, ir), _) => normalize_and(il, &normalize_and(ir, &r, ab), ab),
(Regex::Complement(c), _) if c.normalize(ab).deref() == &Regex::EmptySet => r,
(_, Regex::And(il, ir)) if l.compare(ir, ab) > 0 => {
normalize_and(il, &normalize_and(&l, ir, ab), ab)
}
_ if l.compare(&r, ab) > 0 => normalize_and(&r, &l, ab),
_ => Regex::And(l, r).into(),
}
}
impl Display for Regex {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Regex::EmptyString => write!(f, "e"),
Regex::EmptySet => write!(f, "0"),
Regex::Symbol(s) => write!(f, "{s}"),
Regex::Repeat(r) => write!(f, "({r})*"),
Regex::Complement(c) => write!(f, "~({c})"),
Regex::Or(a, b) => write!(f, "{a} | {b}"),
Regex::And(a, b) => write!(f, "{a} | {b}"),
Regex::Concat(a, b) => write!(f, "{a} {b}"),
}
}
}
#[cfg(test)]
mod tests {
use crate::compile::AlphabetOrder;
use crate::parse_regex;
use crate::regex::Symbol;
use std::rc::Rc;
#[test]
fn nullable() {
assert!(parse_regex("e").unwrap().is_nullable());
assert!(!parse_regex("0").unwrap().is_nullable());
assert!(parse_regex("e | e").unwrap().is_nullable());
assert!(parse_regex("e | A").unwrap().is_nullable());
assert!(parse_regex("A | e").unwrap().is_nullable());
assert!(!parse_regex("A | B").unwrap().is_nullable());
assert!(parse_regex("e | e").unwrap().is_nullable());
assert!(!parse_regex("e & A").unwrap().is_nullable());
assert!(!parse_regex("e & A").unwrap().is_nullable());
assert!(!parse_regex("A & B").unwrap().is_nullable());
assert!(parse_regex("e | e").unwrap().is_nullable());
assert!(!parse_regex("A e").unwrap().is_nullable());
assert!(!parse_regex("e B").unwrap().is_nullable());
assert!(!parse_regex("A B").unwrap().is_nullable());
assert!(parse_regex("A*").unwrap().is_nullable());
assert!(!parse_regex("A+").unwrap().is_nullable());
assert!(parse_regex("A?").unwrap().is_nullable());
}
#[test]
fn apply_symbol() {
let a = Rc::new(Symbol::from("a"));
let b = Rc::new(Symbol::from("b"));
let c = Rc::new(Symbol::from("c"));
let ab = AlphabetOrder::new(&[a.clone(), b.clone(), c.clone()].into_iter().collect());
assert_eq!(
parse_regex("a b").unwrap().derive(Some(&a)).normalize(&ab),
parse_regex("b").unwrap().into()
);
assert_eq!(
parse_regex("a b").unwrap().derive(Some(&b)).normalize(&ab),
parse_regex("0").unwrap().into()
);
}
}