use std::collections::HashMap;
use std::fmt::Display;
use std::hash::Hash;
use std::rc::Rc;
use crate::automata::Automaton;
use crate::automata::AutomatonBuilder;
use crate::bfs_queues::*;
use crate::character_sets::*;
use crate::errors::*;
use crate::labeled_queues::LabeledQueue;
use crate::loop_ranges::*;
use crate::matcher::SearchResult;
use crate::smt_strings::SmtString;
use crate::smt_strings::MAX_CHAR;
use crate::store::*;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum BaseRegLan {
Empty,
Epsilon,
Range(CharSet),
Concat(RegLan, RegLan),
Loop(RegLan, LoopRange),
Complement(RegLan),
Union(Box<[RegLan]>),
Inter(Box<[RegLan]>),
}
pub type RegLan = &'static RE;
#[derive(Debug)]
pub struct RE {
expr: BaseRegLan,
id: usize,
pub nullable: bool,
singleton: bool,
simple_pattern: bool,
deriv_class: Rc<CharPartition>,
}
impl PartialEq for RE {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
impl Eq for RE {}
impl PartialOrd for RE {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for RE {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.id.cmp(&other.id)
}
}
impl Hash for RE {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.id.hash(state)
}
}
fn push_multiple<T: Copy>(v: &mut Vec<T>, n: u32, x: T) {
for _ in 0..n {
v.push(x);
}
}
impl BaseRegLan {
fn is_nullable(&self) -> bool {
match self {
BaseRegLan::Empty => false,
BaseRegLan::Epsilon => true,
BaseRegLan::Range(_) => false,
BaseRegLan::Concat(e1, e2) => e1.nullable && e2.nullable,
BaseRegLan::Loop(e, range) => range.start() == 0 || e.nullable,
BaseRegLan::Complement(e) => !e.nullable,
BaseRegLan::Inter(args) => args.iter().all(|x| x.nullable),
BaseRegLan::Union(args) => args.iter().any(|x| x.nullable),
}
}
pub fn is_atomic(&self) -> bool {
matches!(
self,
BaseRegLan::Empty | BaseRegLan::Epsilon | BaseRegLan::Range(_)
)
}
fn concat_or_atomic(&self) -> bool {
matches!(
self,
BaseRegLan::Empty
| BaseRegLan::Epsilon
| BaseRegLan::Range(..)
| BaseRegLan::Concat(..)
| BaseRegLan::Loop(..)
)
}
fn is_all_chars(&self) -> bool {
if let BaseRegLan::Range(s) = self {
s.is_alphabet()
} else {
false
}
}
fn is_full(&self) -> bool {
if let BaseRegLan::Loop(r, range) = self {
range.is_all() && r.expr.is_all_chars()
} else {
false
}
}
fn is_singleton(&self) -> bool {
match self {
BaseRegLan::Epsilon => true,
BaseRegLan::Range(c) => c.is_singleton(),
BaseRegLan::Concat(e1, e2) => e1.singleton && e2.singleton,
BaseRegLan::Loop(e, range) => e.singleton && range.is_point(),
_ => false,
}
}
fn is_simple_pattern(&self) -> bool {
match self {
BaseRegLan::Epsilon => true,
BaseRegLan::Range(..) => true,
BaseRegLan::Loop(r, ..) => matches!(r.expr, BaseRegLan::Range(..)),
BaseRegLan::Concat(e1, e2) => e1.simple_pattern && e2.simple_pattern,
_ => false,
}
}
fn is_range(&self) -> bool {
matches!(self, BaseRegLan::Range(..))
}
fn match_char_set(&self, s: &CharSet) -> bool {
match self {
BaseRegLan::Range(x) => s.covers(x),
_ => false,
}
}
fn deriv_class(&self) -> Rc<CharPartition> {
fn rc(p: CharPartition) -> Rc<CharPartition> {
Rc::new(p)
}
fn merge_deriv_classes(a: &[RegLan]) -> Rc<CharPartition> {
let mut result = CharPartition::new();
for &re in a {
result = merge_partitions(&result, &re.deriv_class)
}
rc(result)
}
fn empty_partition() -> Rc<CharPartition> {
rc(CharPartition::new())
}
match self {
BaseRegLan::Empty => empty_partition(),
BaseRegLan::Epsilon => empty_partition(),
BaseRegLan::Range(c) => rc(CharPartition::from_set(c)),
BaseRegLan::Concat(e1, e2) => {
if e1.nullable {
rc(merge_partitions(&e1.deriv_class, &e2.deriv_class))
} else {
e1.deriv_class.clone()
}
}
BaseRegLan::Loop(e, _) => e.deriv_class.clone(),
BaseRegLan::Complement(e) => e.deriv_class.clone(),
BaseRegLan::Inter(args) => merge_deriv_classes(args.as_ref()),
BaseRegLan::Union(args) => merge_deriv_classes(args.as_ref()),
}
}
#[allow(dead_code)]
fn collect_chars(&self, v: &mut Vec<u32>) {
match self {
BaseRegLan::Range(c) => {
if c.is_singleton() {
v.push(c.pick());
}
}
BaseRegLan::Loop(r, range) => {
if range.is_point() {
if let BaseRegLan::Range(c) = r.expr {
if c.is_singleton() {
push_multiple(v, range.start(), c.pick());
}
}
}
}
BaseRegLan::Concat(e1, e2) => {
e1.expr.collect_chars(v);
e2.expr.collect_chars(v);
}
_ => (),
}
}
}
impl HashConsed for RE {
type Key = BaseRegLan;
fn make(index: usize, k: &Self::Key) -> Self {
RE {
expr: k.clone(),
id: index,
nullable: k.is_nullable(),
singleton: k.is_singleton(),
simple_pattern: k.is_simple_pattern(),
deriv_class: k.deriv_class(),
}
}
}
impl Display for BaseRegLan {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn write_sub(f: &mut std::fmt::Formatter<'_>, e: RegLan) -> std::fmt::Result {
if e.singleton || e.expr.is_atomic() {
write!(f, "{}", e.expr)
} else {
write!(f, "({})", e.expr)
}
}
fn write_list(
f: &mut std::fmt::Formatter<'_>,
l: &[RegLan],
symbol: char,
) -> std::fmt::Result {
debug_assert!(!l.is_empty());
write_sub(f, l[0])?;
for e in &l[1..] {
write!(f, " {symbol} ")?;
write_sub(f, e)?;
}
Ok(())
}
match self {
BaseRegLan::Empty => write!(f, "\u{2205}"), BaseRegLan::Epsilon => write!(f, "\u{03B5}"),
BaseRegLan::Range(r) => write!(f, "{r}"),
BaseRegLan::Concat(e1, e2) => {
let mut v = Vec::new();
flatten_concat(e1, &mut v);
flatten_concat(e2, &mut v);
for e in v {
write_sub(f, e)?
}
Ok(())
}
BaseRegLan::Loop(e, range) => {
write_sub(f, e)?;
write!(f, "^{range}")
}
BaseRegLan::Complement(e) => {
write!(f, "\u{00AC}")?;
write_sub(f, e)
}
BaseRegLan::Inter(args) => write_list(f, args, '&'),
BaseRegLan::Union(args) => write_list(f, args, '+'),
}
}
}
impl Display for RE {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.expr.fmt(f)
}
}
impl RE {
pub fn empty_complement(&self) -> bool {
self.deriv_class.empty_complement()
}
pub fn num_deriv_classes(&self) -> usize {
self.deriv_class.len()
}
pub fn valid_class_id(&self, cid: ClassId) -> bool {
self.deriv_class.valid_class_id(cid)
}
pub fn is_empty(&self) -> bool {
matches!(self.expr, BaseRegLan::Empty)
}
fn pick_class_rep(&self, cid: ClassId) -> u32 {
self.deriv_class.pick_in_class(cid)
}
fn class_of_char(&self, x: u32) -> ClassId {
debug_assert!(x <= MAX_CHAR);
self.deriv_class.class_of_char(x)
}
fn class_of_set(&self, s: &CharSet) -> Result<ClassId, Error> {
self.deriv_class.class_of_set(s)
}
pub fn class_ids(&self) -> ClassIdIterator<'_> {
self.deriv_class.class_ids()
}
pub fn char_ranges(&self) -> impl Iterator<Item = &CharSet> {
self.deriv_class.ranges()
}
pub fn included_in(&self, other: &Self) -> bool {
sub_language(self, other)
}
}
#[derive(Debug)]
struct ReIterator {
queue: BfsQueue<RegLan>,
}
impl Iterator for ReIterator {
type Item = RegLan;
fn next(&mut self) -> Option<Self::Item> {
fn get_next(queue: &mut BfsQueue<RegLan>) -> Option<RegLan> {
queue.pop().map(|x| {
match x.expr {
BaseRegLan::Concat(left, right) => {
queue.push(left);
queue.push(right);
}
BaseRegLan::Loop(x, _) => {
queue.push(x);
}
BaseRegLan::Complement(x) => {
queue.push(x);
}
BaseRegLan::Inter(ref list) => {
queue.push_all(list.iter().copied());
}
BaseRegLan::Union(ref list) => {
queue.push_all(list.iter().copied());
}
_ => (),
}
x
})
}
get_next(&mut self.queue)
}
}
pub fn sub_terms(r: RegLan) -> impl Iterator<Item = RegLan> {
let mut queue = BfsQueue::new();
queue.push(r);
ReIterator { queue }
}
pub fn leaves(r: RegLan) -> impl Iterator<Item = RegLan> {
sub_terms(r).filter(|&x| x.expr.is_atomic())
}
fn flatten_concat<'a>(r: &'a RE, v: &mut Vec<&'a RE>) {
match &r.expr {
BaseRegLan::Epsilon => (), BaseRegLan::Concat(x, y) => {
flatten_concat(x, v);
flatten_concat(y, v)
}
_ => v.push(r),
}
}
fn decompose_concat(r: &RE) -> Vec<&RE> {
let mut result = Vec::new();
flatten_concat(r, &mut result);
result
}
fn flatten_inter<'a>(r: &'a RE, v: &mut Vec<&'a RE>) {
match &r.expr {
BaseRegLan::Inter(x) => {
for &s in x.as_ref() {
flatten_inter(s, v)
}
}
_ => v.push(r),
}
}
fn flatten_union<'a>(r: &'a RE, v: &mut Vec<&'a RE>) {
match &r.expr {
BaseRegLan::Union(x) => {
for &s in x.as_ref() {
flatten_union(s, v)
}
}
_ => v.push(r),
}
}
fn contains<'a>(v: &[&'a RE], x: &'a RE) -> bool {
for &y in v {
if *y == *x {
return true;
}
if *y > *x {
return false;
}
}
false
}
fn set_to_singleton<'a>(a: &mut Vec<&'a RE>, x: &'a RE) {
a.clear();
a.push(x);
}
#[derive(Debug)]
struct BasePattern {
start: usize,
end: usize,
is_rigid: bool,
start_match: usize,
end_match: usize,
}
impl BasePattern {
fn len(&self) -> usize {
self.end - self.start
}
fn set_match(&mut self, start: usize, end: usize) {
self.start_match = start;
self.end_match = end;
}
fn make(start: usize, end: usize, is_rigid: bool) -> BasePattern {
debug_assert!(start < end);
BasePattern {
start,
end,
is_rigid,
start_match: 0,
end_match: 0,
}
}
}
impl Display for BasePattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let k = if self.is_rigid { "Rigid" } else { "Flex" };
write!(f, "{}({}, {})", k, self.start, self.end)
}
}
fn base_patterns(r: &[&RE]) -> Vec<BasePattern> {
let mut result = Vec::new();
if !r.is_empty() {
let mut j = 0;
let mut rigid_slice = r[0].expr.is_range();
for (i, re) in r.iter().enumerate().skip(1) {
let rigid_i = re.expr.is_range();
if rigid_slice != rigid_i {
result.push(BasePattern::make(j, i, rigid_slice));
rigid_slice = rigid_i;
j = i;
}
}
result.push(BasePattern::make(j, r.len(), rigid_slice))
}
result
}
fn rigid_match_at(pattern: &[&CharSet], s: &[&RE], i: usize) -> bool {
debug_assert!(i + pattern.len() <= s.len());
for j in 0..pattern.len() {
if !s[i + j].expr.match_char_set(pattern[j]) {
return false;
}
}
true
}
fn next_rigid_match(pattern: &[&CharSet], s: &[&RE], i: usize) -> SearchResult {
let p_len = pattern.len();
let s_len = s.len();
if s_len >= p_len {
for j in i..=(s_len - p_len) {
if rigid_match_at(pattern, s, j) {
return SearchResult::Found(j, j + p_len);
}
}
}
SearchResult::NotFound
}
fn prev_rigid_match(pattern: &[&CharSet], s: &[&RE], i: usize) -> SearchResult {
let p_len = pattern.len();
for j in (p_len..=i).rev() {
if rigid_match_at(pattern, s, j - p_len) {
return SearchResult::Found(j - p_len, j);
}
}
SearchResult::NotFound
}
fn char_sets_of_pattern<'a>(p: &[&'a RE]) -> Vec<&'a CharSet> {
let mut result = Vec::new();
for r in p {
if let BaseRegLan::Range(s) = &r.expr {
result.push(s)
} else {
unreachable!()
}
}
result
}
fn rigid_prefix_match<'a>(u: &[&'a RE], v: &[&'a RE], p: &BasePattern) -> bool {
if u.len() >= p.len() {
let p = char_sets_of_pattern(&v[p.start..p.end]);
rigid_match_at(&p, u, 0)
} else {
false
}
}
fn rigid_suffix_match<'a>(u: &[&'a RE], v: &[&'a RE], p: &BasePattern) -> bool {
if u.len() >= p.len() {
let p = char_sets_of_pattern(&v[p.start..p.end]);
rigid_match_at(&p, u, u.len() - p.len())
} else {
false
}
}
#[allow(clippy::many_single_char_names)]
fn find_rigid_matches<'a>(u: &[&'a RE], v: &[&'a RE], patterns: &mut [BasePattern]) -> bool {
let mut i = 0;
for p in patterns {
if p.is_rigid {
let pattern = char_sets_of_pattern(&v[p.start..p.end]);
match next_rigid_match(&pattern, u, i) {
SearchResult::NotFound => return false,
SearchResult::Found(j, k) => {
p.set_match(j, k);
i = k;
}
}
}
}
true
}
#[allow(clippy::many_single_char_names)]
fn find_rigid_matches_rev<'a>(u: &[&'a RE], v: &[&'a RE], patterns: &mut [BasePattern]) -> bool {
let mut i = u.len();
for p in patterns.iter_mut().rev() {
if p.is_rigid {
let pattern = char_sets_of_pattern(&v[p.start..p.end]);
match prev_rigid_match(&pattern, u, i) {
SearchResult::NotFound => return false,
SearchResult::Found(j, k) => {
p.set_match(j, k);
i = j;
}
}
}
}
true
}
fn set_flexible_regions(p: &mut [BasePattern], string_len: usize) {
for i in 0..p.len() {
if !p[i].is_rigid {
let prev = if i == 0 { 0 } else { p[i - 1].end_match };
let next = if i == p.len() - 1 {
string_len
} else {
p[i + 1].start_match
};
p[i].set_match(prev, next);
}
}
}
fn flexible_match<'a>(_u: &[&'a RE], v: &[&'a RE]) -> bool {
v.len() == 1 && v[0].expr.is_full()
}
fn match_flexible_patterns<'a>(u: &[&'a RE], v: &[&'a RE], patterns: &mut [BasePattern]) -> bool {
if patterns.is_empty() {
u.is_empty()
} else {
set_flexible_regions(patterns, u.len());
for p in patterns {
if !p.is_rigid && !flexible_match(&u[p.start_match..p.end_match], &v[p.start..p.end]) {
return false;
}
}
true
}
}
fn shift_pattern_start(patterns: &mut [BasePattern], delta: usize) {
for p in patterns {
debug_assert!(p.end > p.start && p.start >= delta);
p.start -= delta;
p.end -= delta;
}
}
fn concat_inclusion<'a>(u: &[&'a RE], v: &[&'a RE]) -> bool {
let mut b = base_patterns(v);
let mut p = b.as_mut_slice();
let mut u = u;
let mut v = v;
if let Some(pat) = p.first() {
if pat.is_rigid {
if rigid_prefix_match(u, v, pat) {
let len = pat.len();
p = &mut p[1..];
shift_pattern_start(p, len);
u = &u[len..];
v = &v[len..];
} else {
return false;
}
}
}
if let Some(pat) = p.last() {
if pat.is_rigid {
if rigid_suffix_match(u, v, pat) {
let len = pat.len();
let p_len = p.len();
p = &mut p[..p_len - 1];
u = &u[..u.len() - len];
v = &v[..v.len() - len];
} else {
return false;
}
}
}
if find_rigid_matches(u, v, p) && match_flexible_patterns(u, v, p) {
return true;
}
if find_rigid_matches_rev(u, v, p) && match_flexible_patterns(u, v, p) {
return true;
}
false
}
fn sub_language<'a>(r: &'a RE, s: &'a RE) -> bool {
use BaseRegLan::*;
if r == s {
true
} else {
match (&r.expr, &s.expr) {
(Empty, _) => true,
(_, Empty) => false,
(Epsilon, _) => s.nullable,
(_, Epsilon) => false,
(Complement(r1), Complement(s2)) => sub_language(s2, r1),
(_, Union(list)) => {
r.expr.concat_or_atomic() && list.iter().any(|&x| sub_language(r, x))
}
(Inter(list), _) => {
s.expr.concat_or_atomic() && list.iter().any(|&x| sub_language(x, s))
}
(Union(list), _) => {
s.expr.concat_or_atomic() && list.iter().all(|&x| sub_language(x, s))
}
(_, Inter(list)) => {
r.expr.concat_or_atomic() && list.iter().all(|&x| sub_language(r, x))
}
(_, _) => {
let u = decompose_concat(r);
let v = decompose_concat(s);
concat_inclusion(&u, &v)
}
}
}
}
fn simplify_set_operation<'a>(v: &mut Vec<&'a RE>, bottom: &'a RE, top: &'a RE) {
if !v.is_empty() {
v.sort();
v.dedup();
if contains(v, top) {
set_to_singleton(v, top)
} else {
let mut j = 0;
let mut previous = v[0];
if previous != bottom {
v[j] = previous;
j += 1;
}
for i in 1..v.len() {
let current = v[i];
if current.id == previous.id + 1 && previous.id % 2 == 0 {
set_to_singleton(v, top);
return;
}
if current != bottom {
v[j] = current;
previous = current;
j += 1;
}
}
v.truncate(j)
}
}
}
#[derive(Debug, PartialEq, Eq, Hash)]
struct DerivKey(RegLan, ClassId);
#[derive(Debug)]
pub struct ReManager {
store: Store<RE>,
id2re: Vec<RegLan>, sigma: RegLan, empty: RegLan,
sigma_star: RegLan, epsilon: RegLan,
sigma_plus: RegLan, deriv_cache: HashMap<DerivKey, RegLan>, }
impl ReManager {
pub fn new() -> Self {
let mut store = Store::new();
let sigma = store.make(BaseRegLan::Range(CharSet::all_chars()));
let not_sigma = store.make(BaseRegLan::Complement(sigma));
let empty = store.make(BaseRegLan::Empty);
let sigma_star = store.make(BaseRegLan::Loop(sigma, LoopRange::star()));
let epsilon = store.make(BaseRegLan::Epsilon);
let sigma_plus = store.make(BaseRegLan::Loop(sigma, LoopRange::plus()));
debug_assert_eq!(sigma.id, 0);
debug_assert_eq!(not_sigma.id, 1);
debug_assert_eq!(empty.id, 2);
debug_assert_eq!(sigma_star.id, 3);
debug_assert_eq!(epsilon.id, 4);
debug_assert_eq!(sigma_plus.id, 5);
ReManager {
store,
id2re: vec![sigma, not_sigma, empty, sigma_star, epsilon, sigma_plus],
sigma,
empty,
sigma_star,
epsilon,
sigma_plus,
deriv_cache: HashMap::new(),
}
}
fn id_to_re(&self, id: usize) -> RegLan {
self.id2re[id]
}
fn make(&mut self, ast: BaseRegLan) -> RegLan {
match ast {
BaseRegLan::Complement(x) => self.id_to_re(x.id + 1),
_ => {
let i = self.store.counter;
debug_assert!(i == self.id2re.len());
let x = self.store.make(ast);
debug_assert!(x.id <= i);
if x.id == i {
let y = self.store.make(BaseRegLan::Complement(x));
debug_assert!(y.id == i + 1);
self.id2re.push(x);
self.id2re.push(y);
}
x
}
}
}
pub fn empty(&self) -> RegLan {
self.empty
}
pub fn full(&self) -> RegLan {
self.sigma_star
}
pub fn epsilon(&self) -> RegLan {
self.epsilon
}
pub fn sigma_plus(&self) -> RegLan {
self.sigma_plus
}
pub fn char_set(&mut self, set: CharSet) -> RegLan {
self.make(BaseRegLan::Range(set))
}
pub fn complement(&mut self, e: RegLan) -> RegLan {
self.id_to_re(e.id ^ 1)
}
pub fn concat(&mut self, e1: RegLan, e2: RegLan) -> RegLan {
match (&e1.expr, &e2.expr) {
(BaseRegLan::Empty, _) => self.empty,
(_, BaseRegLan::Empty) => self.empty,
(BaseRegLan::Epsilon, _) => e2,
(_, BaseRegLan::Epsilon) => e1,
(_, BaseRegLan::Loop(y, rng)) if *e1 == **y => {
self.make(BaseRegLan::Loop(e1, rng.add_point(1)))
}
(BaseRegLan::Loop(x, rng), _) if *e2 == **x => {
self.make(BaseRegLan::Loop(e2, rng.add_point(1)))
}
(BaseRegLan::Loop(x, x_rng), BaseRegLan::Loop(y, y_rng)) if *x == *y => {
self.make(BaseRegLan::Loop(x, x_rng.add(y_rng)))
}
_ if *e1 == *e2 => self.make(BaseRegLan::Loop(e1, LoopRange::point(2))),
(BaseRegLan::Concat(x, y), _) => {
let right = self.concat(y, e2);
self.concat(x, right)
}
_ => {
if e1.nullable && e2 == self.sigma_star {
e2
} else {
self.make(BaseRegLan::Concat(e1, e2))
}
}
}
}
pub fn concat_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan {
let mut v = Vec::new();
for x in a {
flatten_concat(x, &mut v);
}
let mut result = self.epsilon;
for &x in v.iter().rev() {
result = self.concat(x, result);
}
result
}
pub fn mk_loop(&mut self, e: RegLan, range: LoopRange) -> RegLan {
if range.is_zero() {
self.epsilon
} else if range.is_one() {
e
} else {
match &e.expr {
BaseRegLan::Empty => self.empty,
BaseRegLan::Epsilon => self.epsilon,
BaseRegLan::Loop(x, x_rng) if x_rng.right_mul_is_exact(&range) => {
self.make(BaseRegLan::Loop(x, x_rng.mul(&range)))
}
_ => self.make(BaseRegLan::Loop(e, range)),
}
}
}
fn make_inter(&mut self, mut v: Vec<RegLan>) -> RegLan {
simplify_set_operation(&mut v, self.sigma_star, self.empty);
if contains(&v, self.epsilon) {
if v.iter().all(|&r| r.nullable) {
self.epsilon
} else {
self.empty
}
} else {
match v.len() {
0 => self.sigma_star,
1 => v[0],
_ => self.make(BaseRegLan::Inter(v.into())),
}
}
}
fn make_union(&mut self, mut v: Vec<RegLan>) -> RegLan {
#[allow(dead_code)]
fn is_included(r: RegLan, s: RegLan) -> bool {
let result = sub_language(r, s);
if result {
println!("---> subsumption: {r} subsumed by {s}");
}
result
}
fn is_subsumed(r: RegLan, a: &[RegLan]) -> bool {
a.iter().any(|&x| x != r && sub_language(r, x))
}
fn remove_subsumed(a: &mut Vec<RegLan>) {
let mut i = 0;
while i < a.len() {
if is_subsumed(a[i], a) {
a.remove(i);
} else {
i += 1;
}
}
}
simplify_set_operation(&mut v, self.empty, self.sigma_star);
if v.len() >= 2 {
remove_subsumed(&mut v);
}
match v.len() {
0 => self.empty,
1 => v[0],
_ => self.make(BaseRegLan::Union(v.into())),
}
}
pub fn inter(&mut self, e1: RegLan, e2: RegLan) -> RegLan {
let mut v = Vec::new();
flatten_inter(e1, &mut v);
flatten_inter(e2, &mut v);
self.make_inter(v)
}
pub fn inter_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan {
let mut v = Vec::new();
for r in a {
flatten_inter(r, &mut v);
}
self.make_inter(v)
}
pub fn union(&mut self, e1: RegLan, e2: RegLan) -> RegLan {
let mut v = Vec::new();
flatten_union(e1, &mut v);
flatten_union(e2, &mut v);
self.make_union(v)
}
pub fn union_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan {
let mut v = Vec::new();
for r in a {
flatten_union(r, &mut v);
}
self.make_union(v)
}
pub fn diff(&mut self, e1: RegLan, e2: RegLan) -> RegLan {
let comp_e2 = self.complement(e2);
self.inter(e1, comp_e2)
}
pub fn diff_list(&mut self, e1: RegLan, a: impl IntoIterator<Item = RegLan>) -> RegLan {
let mut v = Vec::new();
flatten_inter(e1, &mut v);
for r in a {
flatten_inter(self.complement(r), &mut v);
}
self.make_inter(v)
}
pub fn all_chars(&mut self) -> RegLan {
self.sigma
}
pub fn char(&mut self, x: u32) -> RegLan {
assert!(x <= MAX_CHAR);
self.char_set(CharSet::singleton(x))
}
pub fn range(&mut self, start: u32, end: u32) -> RegLan {
assert!(start <= end && end <= MAX_CHAR);
self.char_set(CharSet::range(start, end))
}
pub fn star(&mut self, e: RegLan) -> RegLan {
self.mk_loop(e, LoopRange::star())
}
pub fn plus(&mut self, e: RegLan) -> RegLan {
self.mk_loop(e, LoopRange::plus())
}
pub fn opt(&mut self, e: RegLan) -> RegLan {
self.mk_loop(e, LoopRange::opt())
}
pub fn exp(&mut self, e: RegLan, k: u32) -> RegLan {
self.mk_loop(e, LoopRange::point(k))
}
pub fn smt_loop(&mut self, e: RegLan, i: u32, j: u32) -> RegLan {
if i <= j {
self.mk_loop(e, LoopRange::finite(i, j))
} else {
self.empty
}
}
pub fn smt_range(&mut self, s1: &SmtString, s2: &SmtString) -> RegLan {
if s1.len() == 1 && s2.len() == 1 {
let c1 = s1.char(0);
let c2 = s2.char(0);
if c1 <= c2 {
return self.char_set(CharSet::range(c1, c2));
}
}
self.empty
}
pub fn str(&mut self, s: &SmtString) -> RegLan {
let mut re = self.epsilon();
for c in s.iter().rev() {
let c = self.char(*c);
re = self.concat(c, re);
}
re
}
fn deriv_list(&mut self, list: &[RegLan], c: u32) -> Vec<RegLan> {
list.iter().map(|r| self.deriv(r, c)).collect()
}
fn compute_derivative(&mut self, e: RegLan, c: u32) -> RegLan {
match e.expr {
BaseRegLan::Empty => self.empty,
BaseRegLan::Epsilon => self.empty,
BaseRegLan::Range(r) => {
if r.contains(c) {
self.epsilon
} else {
self.empty
}
}
BaseRegLan::Concat(e1, e2) => {
let d1 = self.deriv(e1, c);
let d1 = self.concat(d1, e2);
if e1.nullable {
let d2 = self.deriv(e2, c);
self.union(d1, d2)
} else {
d1
}
}
BaseRegLan::Loop(e1, range) => {
let d1 = self.deriv(e1, c);
let e2 = self.mk_loop(e1, range.shift());
self.concat(d1, e2)
}
BaseRegLan::Complement(e1) => {
let d1 = self.deriv(e1, c);
self.complement(d1)
}
BaseRegLan::Inter(ref v) => {
let d = self.deriv_list(&v[..], c);
self.inter_list(d)
}
BaseRegLan::Union(ref v) => {
let d = self.deriv_list(&v[..], c);
self.union_list(d)
}
}
}
fn deriv(&mut self, e: RegLan, c: u32) -> RegLan {
let cid = e.class_of_char(c);
self.cached_deriv(e, cid)
}
pub fn start_char(&mut self, e: RegLan, c: u32) -> bool {
match &e.expr {
BaseRegLan::Empty => false,
BaseRegLan::Epsilon => false,
BaseRegLan::Range(set) => set.contains(c),
BaseRegLan::Concat(e1, e2) => {
self.start_char(e1, c) || e1.nullable && self.start_char(e2, c)
}
BaseRegLan::Loop(e, _) => self.start_char(e, c),
BaseRegLan::Inter(args) => args.iter().all(|x| self.start_char(x, c)),
BaseRegLan::Union(args) => args.iter().any(|x| self.start_char(x, c)),
BaseRegLan::Complement(_) => {
let d = self.deriv(e, c);
!self.is_empty_re(d)
}
}
}
pub fn start_class(&mut self, e: RegLan, cid: ClassId) -> Result<bool, Error> {
if e.valid_class_id(cid) {
let c = e.pick_class_rep(cid);
Ok(self.start_char(e, c))
} else {
Err(Error::BadClassId)
}
}
fn cached_deriv(&mut self, e: RegLan, cid: ClassId) -> RegLan {
let key = DerivKey(e, cid);
match self.deriv_cache.get(&key) {
Some(&r) => r,
None => {
let c = e.pick_class_rep(cid);
let r = self.compute_derivative(e, c);
self.deriv_cache.insert(key, r);
r
}
}
}
pub fn class_derivative(&mut self, e: RegLan, cid: ClassId) -> Result<RegLan, Error> {
if e.valid_class_id(cid) {
Ok(self.cached_deriv(e, cid))
} else {
Err(Error::BadClassId)
}
}
pub fn class_derivative_unchecked(&mut self, e: RegLan, cid: ClassId) -> RegLan {
self.cached_deriv(e, cid)
}
pub fn set_derivative(&mut self, e: RegLan, c: &CharSet) -> Result<RegLan, Error> {
let cid = e.class_of_set(c)?;
Ok(self.cached_deriv(e, cid))
}
pub fn set_derivative_unchecked(&mut self, e: RegLan, c: &CharSet) -> RegLan {
let cid = e.class_of_set(c).unwrap();
self.cached_deriv(e, cid)
}
pub fn char_derivative(&mut self, e: RegLan, c: u32) -> RegLan {
debug_assert!(c <= MAX_CHAR);
self.deriv(e, c)
}
pub fn str_derivative(&mut self, e: RegLan, s: &SmtString) -> RegLan {
s.iter().fold(e, |r, &c| self.char_derivative(r, c))
}
pub fn iter_derivatives(&mut self, e: RegLan) -> DerivativeIterator<'_> {
let mut queue = BfsQueue::new();
queue.push(e);
DerivativeIterator {
manager: self,
queue,
}
}
pub fn str_in_re(&mut self, s: &SmtString, e: RegLan) -> bool {
self.str_derivative(e, s).nullable
}
pub fn is_empty_re(&mut self, e: RegLan) -> bool {
self.iter_derivatives(e).all(|x| !x.nullable)
}
fn get_string_path(&mut self, e: RegLan) -> Option<Vec<(RegLan, ClassId)>> {
let mut queue: LabeledQueue<RegLan, ClassId> = LabeledQueue::new(e);
while let Some(r) = queue.pop() {
if r.nullable {
return queue.full_path(&r);
} else {
for cid in r.class_ids() {
let d = self.class_derivative_unchecked(r, cid);
queue.push(r, cid, d);
}
}
}
None
}
pub fn get_string(&mut self, e: RegLan) -> Option<SmtString> {
match self.get_string_path(e) {
None => None,
Some(path) => {
let result: Vec<u32> = path
.iter()
.map(|(re, cid)| re.pick_class_rep(*cid))
.collect();
Some(result.into())
}
}
}
fn compile_with_bound(&mut self, e: RegLan, max_states: usize) -> Option<Automaton> {
if max_states == 0 {
None
} else {
let mut builder = AutomatonBuilder::new(&e.expr);
let mut queue = BfsQueue::new();
let mut state_count = 0;
queue.push(e);
while let Some(e) = queue.pop() {
debug_assert!(state_count <= max_states);
if state_count == max_states {
return None;
}
state_count += 1;
for set in e.char_ranges() {
let d = self.set_derivative_unchecked(e, set);
queue.push(d);
builder.add_transition(&e.expr, set, &d.expr);
}
if !e.empty_complement() {
let d = self.class_derivative_unchecked(e, ClassId::Complement);
queue.push(d);
builder.set_default_successor(&e.expr, &d.expr);
}
if e.nullable {
builder.mark_final(&e.expr);
}
}
Some(builder.build_unchecked())
}
}
pub fn compile(&mut self, e: RegLan) -> Automaton {
self.compile_with_bound(e, usize::MAX).unwrap()
}
pub fn try_compile(&mut self, e: RegLan, max_states: usize) -> Option<Automaton> {
self.compile_with_bound(e, max_states)
}
}
impl Default for ReManager {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct DerivativeIterator<'a> {
queue: BfsQueue<RegLan>,
manager: &'a mut ReManager,
}
impl<'a> Iterator for DerivativeIterator<'a> {
type Item = &'a RE;
fn next(&mut self) -> Option<Self::Item> {
if let Some(r) = self.queue.pop() {
for cid in r.class_ids() {
let d = self.manager.class_derivative_unchecked(r, cid);
self.queue.push(d);
}
Some(r)
} else {
None
}
}
}
#[allow(clippy::uninlined_format_args)]
#[cfg(test)]
mod tests {
use crate::smt_strings::char_to_smt;
use super::*;
#[allow(clippy::uninlined_format_args)]
fn print_term(name: &str, r: RegLan) {
println!("term {} = {}", name, r);
println!(" ptr: {:p}", r);
println!(" id: {}", r.id);
println!(" nullable: {}", r.nullable);
println!(" singleton: {}", r.singleton);
println!(" pattern: {}", r.simple_pattern);
println!(" deriv: {}", r.deriv_class);
println!();
}
fn build_atoms(re: &mut ReManager) -> Vec<RegLan> {
vec![
re.empty(),
re.epsilon(),
re.all_chars(),
re.char('a' as u32),
re.char('b' as u32),
re.range('0' as u32, '9' as u32),
re.range('A' as u32, 'Z' as u32),
]
}
fn build_test_res(re: &mut ReManager) -> Vec<RegLan> {
let mut v = build_atoms(re);
let w = v.clone();
for &x in &w {
v.push(re.complement(x));
v.push(re.opt(x));
v.push(re.star(x));
v.push(re.plus(x));
v.push(re.exp(x, 2));
}
for &x in &w {
for &y in &w {
v.push(re.concat(x, y));
v.push(re.inter(x, y));
v.push(re.union(x, y));
}
}
v.sort();
v.dedup();
v
}
fn check_equal(re1: RegLan, re2: RegLan) {
assert_eq!(re1, re2);
assert_eq!(re1.id, re2.id);
assert!(std::ptr::eq(re1, re2));
}
#[test]
fn hash_atoms() {
let re = &mut ReManager::new();
let v1 = build_atoms(re);
let v2 = build_atoms(re);
for (i, &t) in v1.iter().enumerate() {
let name = format!("t{i}");
print_term(&name, t);
check_equal(t, v2[i]);
}
}
#[test]
fn test_loop() {
let re = &mut ReManager::new();
let v = build_atoms(re);
for &t in &v {
let x = re.star(t);
print_term(&format!("star({t})"), x);
check_equal(x, re.star(t));
}
for &t in &v {
let x = re.plus(t);
print_term(&format!("plus({t})"), x);
check_equal(x, re.plus(t));
}
for &t in &v {
let x = re.opt(t);
print_term(&format!("opt({t})"), x);
check_equal(x, re.opt(t));
}
for &t in &v {
for k in 0..3 {
let x = re.exp(t, k);
print_term(&format!("exp({t}, {k})"), x);
check_equal(x, re.exp(t, k));
}
}
let a = re.all_chars();
let a2 = re.exp(a, 2);
let a2_star = re.star(a2);
let a2_plus = re.plus(a2);
let a_star = re.star(a);
let a_plus = re.plus(a);
let a_star2 = re.exp(a_star, 2);
let a_star_star = re.star(a_star);
let a_plus_star = re.star(a_plus);
let a_star_plus = re.plus(a_star);
print_term("(Sigma^2)^*", a2_star);
print_term("(Sigma^2)^+", a2_plus);
print_term("(Sigma^*)^2", a_star2);
print_term("(Sigma^*)^*", a_star_star);
print_term("(Sigma^*)^+)", a_star_plus);
print_term("(Sigma^+)^*", a_plus_star);
assert_eq!(a_star2, a_star);
assert_ne!(a2_star, a_star);
assert_ne!(a2_star, a2_plus);
assert_ne!(a2_plus, a_star);
assert_eq!(a_plus_star, a_star);
assert_eq!(a_star_plus, a_star);
assert_eq!(a_star_star, a_star);
}
#[test]
fn test_concat() {
let re = &mut ReManager::new();
let v = build_atoms(re);
for &t in &v {
for &u in &v {
let x = re.concat(t, u);
print_term(&format!("concat({t}, {u})"), x);
check_equal(x, re.concat(t, u));
}
}
}
#[test]
fn test_inter() {
let re = &mut ReManager::new();
let v = build_atoms(re);
for &t in &v {
for &u in &v {
let x = re.inter(t, u);
print_term(&format!("inter({t}, {u})"), x);
check_equal(x, re.inter(t, u));
}
}
}
#[test]
fn test_union() {
let re = &mut ReManager::new();
let v = build_atoms(re);
for &t in &v {
for &u in &v {
let x = re.union(t, u);
print_term(&format!("union({t}, {u})"), x);
check_equal(x, re.union(t, u));
}
}
}
#[test]
fn test_complement() {
let re = &mut ReManager::new();
let v = build_atoms(re);
for &t in &v {
let x = re.complement(t);
print_term(&format!("complement({t})"), x);
check_equal(x, re.complement(t));
let y = re.complement(x);
print_term(&format!("complement({x})"), y);
check_equal(y, t);
check_equal(y, re.complement(x));
}
}
#[test]
fn test_from_str() {
let re = &mut ReManager::new();
let x = re.str(&SmtString::from("abcde"));
print_term("(str.to_re \"abcde\")", x);
check_equal(x, re.str(&SmtString::from("abcde")));
let v = vec![
re.char('a' as u32),
re.char('b' as u32),
re.epsilon(),
re.epsilon(),
re.char('c' as u32),
re.char('d' as u32),
re.char('e' as u32),
];
let y = re.concat_list(v);
check_equal(x, y);
}
#[test]
fn bigger_test() {
let re = &mut ReManager::new();
let v = build_test_res(re);
for &t in &v {
for &u in &v {
let x = re.concat(t, u);
print_term(&format!("concat({t}, {u})"), x);
check_equal(x, re.concat(t, u));
let x = re.inter(t, u);
print_term(&format!("inter({t}, {u})"), x);
check_equal(x, re.inter(t, u));
let x = re.union(t, u);
print_term(&format!("union({t}, {u})"), x);
check_equal(x, re.union(t, u));
}
}
}
#[test]
fn test_sub_terms() {
fn print_sub_terms(t: RegLan) {
println!("Base term: {t}");
println!("Sub terms = [");
for x in sub_terms(t) {
println!(" {x}");
}
println!("]");
println!("Leaves = [");
for leaf in leaves(t) {
println!(" {leaf}");
}
println!("]\n");
}
let re = &mut ReManager::new();
let v = build_test_res(re);
for &t in &v {
print_sub_terms(t)
}
let t = re.str(&"0987654321aabd".into());
print_sub_terms(t)
}
#[test]
fn test_base_patterns() {
let re = &mut ReManager::new();
fn show_patterns(r: RegLan) {
let v = decompose_concat(r);
let test = base_patterns(&v);
println!("Expression: {r} ");
println!(" vector:");
for x in &v {
println!(" {x}");
}
println!(" base patterns:");
for x in &test {
println!(" {x}");
}
println!()
}
let test1 = re.all_chars();
show_patterns(test1);
let test2 = re.epsilon();
show_patterns(test2);
let test3 = re.full();
show_patterns(test3);
let digits = re.range('0' as u32, '9' as u32);
let d = re.star(digits);
let test4 = re.concat_list([test1, digits, test3, test3, d, test1, d, d]);
show_patterns(test4);
}
#[test]
fn test_deriv() {
let re = &mut ReManager::new();
let v = build_test_res(re);
for &t in &v {
for c in t.deriv_class.ranges() {
let x = re.set_derivative(t, c);
match x {
Ok(d) => println!("deriv {t} wrt {c} = {d}"),
Err(e) => panic!("deriv {} wrt {} failed with error {:?}", t, c, e),
}
}
if !t.deriv_class.empty_complement() {
let y = re.class_derivative(t, ClassId::Complement);
match y {
Ok(d) => println!("deriv {t} wrt CompClass = {d}"),
Err(e) => panic!("deriv {} wrt CompClass failed with error {:?}", t, e),
}
}
}
}
fn show_derivatives(re: &mut ReManager, e: RegLan) {
println!("Expression: {e}");
println!(" deriv classes: {}", e.deriv_class);
for c in 'a'..='c' {
println!(" deriv({e}, {c}) = {}", re.char_derivative(e, c as u32))
}
if !e.empty_complement() {
println!(
" deriv({e}, CompClass) = {}",
re.class_derivative(e, ClassId::Complement).unwrap()
)
}
println!()
}
#[test]
fn test_deriv2() {
let re = &mut ReManager::new();
let a = re.str(&"a".into());
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let e = re.union_list([a, ac, bc]);
show_derivatives(re, e);
let d1 = re.char_derivative(e, 'a' as u32);
show_derivatives(re, d1);
let d2 = re.char_derivative(e, 'b' as u32);
show_derivatives(re, d2);
let d3 = re.char_derivative(e, 'c' as u32);
show_derivatives(re, d3);
assert!(re.str_in_re(&"a".into(), e));
assert!(re.str_in_re(&"ac".into(), e));
assert!(re.str_in_re(&"bc".into(), e));
assert!(!re.str_in_re(&"b".into(), e));
assert!(!re.str_in_re(&"c".into(), e));
}
#[test]
fn test_deriv3() {
let re = &mut ReManager::new();
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);
show_derivatives(re, e);
let d1 = re.char_derivative(e, 'a' as u32);
show_derivatives(re, d1);
let d2 = re.char_derivative(e, 'b' as u32);
show_derivatives(re, d2);
let d3 = re.char_derivative(e, 'c' as u32);
show_derivatives(re, d3);
assert!(re.str_in_re(&"acacbc".into(), e));
assert!(re.str_in_re(&"".into(), e));
}
fn all_derivatives(re: &mut ReManager, e: RegLan) -> Vec<RegLan> {
let mut queue = BfsQueue::new();
let mut result = Vec::new();
queue.push(e);
result.push(e);
while let Some(r) = queue.pop() {
for cid in r.class_ids() {
if let Ok(d) = re.class_derivative(r, cid) {
if queue.push(d) {
result.push(d)
}
} else {
panic!("Unexpected failure: deriv {} w.r.t Class{}", r, cid)
}
}
}
result
}
#[test]
fn test_all_deriv() {
fn show_derivs(e: RegLan, v: &[RegLan]) {
println!("All derivatives of {e}");
for &d in v {
println!(" {d}")
}
if v.len() == 1 {
println!("Total: 1 derivative")
} else {
println!("Total: {} derivatives", v.len())
}
}
let re = &mut ReManager::new();
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);
let v = all_derivatives(re, e);
show_derivs(e, &v);
let a = [
re.sigma_plus(),
re.str(&"something".into()),
re.all_chars(),
re.all_chars(),
re.char(':' as u32),
re.full(),
re.char(':' as u32),
re.full(),
re.str(&".jpg".into()),
];
let e = re.concat_list(a);
let v = all_derivatives(re, e);
show_derivs(e, &v);
}
#[test]
fn test_derivative_iter() {
let re = &mut ReManager::new();
let a = [
re.str(&"abc/".into()),
re.full(),
re.char('/' as u32),
re.all_chars(),
re.all_chars(),
re.char('-' as u32),
re.all_chars(),
re.all_chars(),
re.str(&"/def/".into()),
re.full(),
];
let e = re.concat_list(a);
println!("All derivatives of {e}");
let mut count = 0;
for r in re.iter_derivatives(e) {
println!(" {r}");
count += 1;
}
println!("Total: {count} derivatives");
}
#[test]
fn test_char_deriv() {
fn sum_of_str(re: &mut ReManager, s1: &str, s2: &str) -> RegLan {
let s1 = re.str(&s1.into());
let s2 = re.str(&s2.into());
re.union(s1, s2)
}
let mut re = ReManager::new();
let e = sum_of_str(&mut re, "abc", "acc");
let d = re.char_derivative(e, 'a' as u32);
assert_eq!(d, sum_of_str(&mut re, "bc", "cc"));
}
#[test]
fn test_empty_check() {
let re = &mut ReManager::new();
let full = re.full();
let abcd = re.str(&"abcd".into());
let bc = re.str(&"bc".into());
let a = re.concat(abcd, full); let b = re.concat_list([full, bc, full]);
let test = re.diff(a, b); assert!(re.is_empty_re(test));
}
#[test]
fn test_empty_check2() {
let re = &mut ReManager::new();
let c1 = [
re.all_chars(),
re.all_chars(),
re.all_chars(),
re.all_chars(),
re.full(),
re.str(&"/abcd/".into()),
re.all_chars(),
re.str(&"/end".into()),
];
let c2 = [re.all_chars(), re.str(&"/zhfg".into()), re.full()];
let e1 = re.concat_list(c1);
let e2 = re.concat_list(c2);
let test = re.inter(e1, e2);
assert!(!re.is_empty_re(test));
let sample = re.get_string(test);
assert!(sample.is_some());
println!("Sample string in {}: {}", test, sample.unwrap());
}
#[test]
fn test_compile() {
let re = &mut ReManager::new();
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);
let auto = re.compile(e);
println!("Automaton for (ac + bc)*");
println!("{}", auto);
println!("Char partition: {}", auto.combined_char_partition());
let reps = auto.pick_alphabet();
print!("Alphabet:");
for x in reps {
print!(" {}", char_to_smt(x));
}
println!();
let m = auto.compile_successors();
println!("Compiled transition table: size = {}", m.size());
assert_eq!(auto.num_states(), 3);
assert_eq!(auto.num_final_states(), 1);
}
#[test]
fn test_compile2() {
let re = &mut ReManager::new();
let a = [
re.sigma_plus(),
re.str(&"something".into()),
re.all_chars(),
re.all_chars(),
re.char(':' as u32),
re.full(),
re.char(':' as u32),
re.full(),
re.str(&".jpg".into()),
];
let e = re.concat_list(a);
let auto = re.compile(e);
println!("Automaton for {}", e);
println!("{}", auto);
println!("Char partition: {}", auto.combined_char_partition());
let a = auto.pick_alphabet();
print!("Alphabet representatives:");
for x in a {
print!(" {}", char_to_smt(x));
}
println!();
let m = auto.compile_successors();
println!("Compiled transition table: size = {}", m.size());
println!("Transition table: {}", m);
assert!(auto.accepts(&"prefix_then_somethingAB:middle:mores_stuff.jpg".into()));
assert!(!auto.accepts(&"prefix_then_something:middle:mores_stuff.jpg".into()));
auto.test_minimizer();
}
#[test]
fn test_compile3() {
let re = &mut ReManager::new();
let c1 = [
re.all_chars(),
re.all_chars(),
re.all_chars(),
re.all_chars(),
re.full(),
re.str(&"/abcd/".into()),
re.all_chars(),
re.str(&"/end".into()),
];
let c2 = [re.all_chars(), re.str(&"/zhfg".into()), re.full()];
let e1 = re.concat_list(c1);
let e2 = re.concat_list(c2);
let test = re.inter(e1, e2);
let auto = re.compile(test);
println!("Automaton for {}", test);
println!("{}", auto);
println!("Char partition: {}", auto.combined_char_partition());
let a = auto.pick_alphabet();
print!("Alphabet representatives:");
for x in a {
print!(" {}", char_to_smt(x));
}
println!();
let m = auto.compile_successors();
println!("Compiled transition table: size = {}", m.size());
println!("Table: {}", m);
let w = re.get_string(test).unwrap();
assert!(auto.accepts(&w));
auto.test_minimizer();
}
#[test]
fn test_bounded_compile() {
let re = &mut ReManager::new();
let a = [
re.str(&"abc/".into()),
re.full(),
re.char('/' as u32),
re.all_chars(),
re.all_chars(),
re.char('-' as u32),
re.all_chars(),
re.all_chars(),
re.str(&"/def/".into()),
re.full(),
];
let e = re.concat_list(a);
let test0 = re.try_compile(e, 0);
assert!(test0.is_none());
let test1 = re.try_compile(e, 46);
assert!(test1.is_none());
let test2 = re.try_compile(e, 47);
assert!(test2.is_some());
assert_eq!(test2.unwrap().num_states(), 47);
}
#[test]
fn test_compile4() {
let re = &mut ReManager::new();
let bb = re.str(&"bb".into());
let digits = re.range('0' as u32, '9' as u32);
let digits = re.plus(digits);
let first = re.concat(bb, digits);
let sigma = re.all_chars();
let lop = re.smt_loop(sigma, 0, 4);
let second = re.complement(lop);
let to_compile = re.inter(second, first);
let test1 = re.try_compile(to_compile, 10000);
assert!(test1.is_some());
assert!(test1.unwrap().accepts(&"bb01234".into()));
}
#[test]
fn test_compile5() {
let re = &mut ReManager::new();
let a = re.char('a' as u32);
let a5 = re.smt_loop(a, 5, 5);
let a5plus = re.plus(a5);
println!("testing {a5plus}");
let test = re.try_compile(a5plus, 10000);
assert!(test.is_some());
let dfa = test.unwrap();
println!("Resulting automaton: {dfa}");
assert!(dfa.accepts(&"aaaaaaaaaa".into()));
assert!(dfa.accepts(&"aaaaa".into()));
assert!(!dfa.accepts(&"aaaa".into()));
}
#[test]
fn test_compile6() {
let re = &mut ReManager::new();
let a = re.char('a' as u32);
let a5 = re.smt_loop(a, 5, 5);
let a5plus = re.plus(a5);
let a2 = re.smt_loop(a, 2, 2);
let a2plus = re.plus(a2);
let inter = re.inter(a5plus, a2plus);
println!("testing {inter}");
let test = re.try_compile(inter, 10000);
assert!(test.is_some());
let dfa = test.unwrap();
println!("Resulting automaton: {dfa}");
assert!(dfa.accepts(&"aaaaaaaaaa".into()));
assert!(!dfa.accepts(&"aaaaa".into()));
assert!(!dfa.accepts(&"aa".into()));
}
}