Struct ReManager

Source

pub struct ReManager { /* private fields */ }

Expand description

A store for constructing regular expressions using hash-consing.

The store ensures that each regular expression has a unique integer id.

For all regular expressions e1 and e2 constructed with the same manager, we have e1.expr == e2.expr iff e1.id == e2.id

§Examples

This example shows how to create the regular expression (ac + bc)* and compute its derivatives.

use aws_smt_strings::regular_expressions::*;

let re = &mut ReManager::new();
let ac = re.str(&"ac".into());  // ac
let bc = re.str(&"bc".into());  // bc
let sum = re.union(ac, bc);     // ac + bc
let e = re.star(sum);           // (ac + bc)*

let d1 = re.char_derivative(e, 'a' as u32); // derivative of e w.r.t. 'a'
let d2 = re.char_derivative(e, 'b' as u32); // derivative of e w.r.t. 'b'

// by hash-consing: d1 and d2 are equal
assert_eq!(d1, d2);
assert!(std::ptr::eq(d1, d2));

Implementations§

Source §

impl ReManager

Source

pub fn new() -> Self

Create a new ReManager

Source

pub fn empty(&self) -> RegLan

The empty language

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new(); // create a manager
let e = re.empty();        // get the empty language

// no string belongs to e.
assert!(! re.str_in_re(&"0129".into(), e));

Source

pub fn full(&self) -> RegLan

The full language

This language contains every strings. It’s the complement of empty.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new(); // create a manager
let e = re.full();        // get the full language

// every string belongs to e.
assert!(re.str_in_re(&"0129".into(), e));

Source

pub fn epsilon(&self) -> RegLan

The RE that contains only the empty string

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();
let e = re.epsilon();

// the empty string belongs to e
assert!(re.str_in_re(&"".into(), e));
// a non-empty string does not belong to e
assert!(! re.str_in_re(&"a".into(), e));

Source

pub fn sigma_plus(&self) -> RegLan

The RE that contains all non-empty strings

This is the complement of epsilon.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();
let e = re.sigma_plus();

// the empty string does not belong to e
assert!(! re.str_in_re(&"".into(), e));
// any non-empty string belongs to e
assert!(re.str_in_re(&"a".into(), e));

Source

pub fn char_set(&mut self, set: CharSet) -> RegLan

Regular expression defined by a character set

Return the regular expression that contains all single-character strings with characters in the specified set. See also range.

§Example

Lower-case letters in ASCII.

use aws_smt_strings::regular_expressions::*;
use aws_smt_strings::character_sets::*;

let mut re = ReManager::new();
let set = CharSet::range('a' as u32, 'z' as u32);
let e = re.char_set(set);

// a single-character string that's in e
assert!(re.str_in_re(&"c".into(), e));
// a single-character string that's not in e
assert!(!re.str_in_re(&"7".into(), e));
// strings with more than one characters are not in e
assert!(!re.str_in_re(&"abc".into(), e));

Source

pub fn complement(&mut self, e: RegLan) -> RegLan

Complement of a language

Return the complement of RE e.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();
let a_single_digit = re.range('0' as u32, '9' as u32);
let not_a_digit = re.complement(a_single_digit);

assert!(re.str_in_re(&"7".into(), a_single_digit));
assert!(! re.str_in_re(&"7".into(), not_a_digit));

// any string of more than 2 characters is not a single digit!
assert!(re.str_in_re(&"94".into(), not_a_digit))

Source

pub fn concat(&mut self, e1: RegLan, e2: RegLan) -> RegLan

Concatenation of two languages

Concatenate languages e1 and e2 in that order.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();
let a_letter = re.range('a' as u32, 'z' as u32);
let a_digit = re.range('0' as u32, '9' as u32);
let e = re.concat(a_letter, a_digit);

assert!(re.str_in_re(&"h4".into(), e));

Source

pub fn concat_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan

Concatenation of multiple languages

Build the concatenation of a[0], a[1], … in this order.

return epsilon is a is empty.
return a[0] is a.len() == 1

See concat

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let a_letter = re.range('a' as u32, 'z' as u32);
let a_digit = re.range('0' as u32, '9' as u32);
let e = re.concat_list([a_letter, a_letter, a_digit]);

assert!(re.str_in_re(&"ab3".into(), e));

Source

pub fn mk_loop(&mut self, e: RegLan, range: LoopRange) -> RegLan

Generalized loop

Parameter range defines an interval of natural numbers [i, j] where j may be infinity. This method returns the regular language equal to the union of e^k for k in the interval [i, j]. See loop_ranges.

§Example

Regular expression that recognizes sequences of 3 to 5 digits.

use aws_smt_strings::{regular_expressions::*, loop_ranges::*};

let mut re = ReManager::new();

let digits = re.range('0' as u32, '9' as u32);
let e = re.mk_loop(digits, LoopRange::finite(3, 5));

assert!(re.str_in_re(&"12345".into(), e));
assert!(! re.str_in_re(&"123456".into(), e));

Source

pub fn inter(&mut self, e1: RegLan, e2: RegLan) -> RegLan

Intersection of two languages

Return the intersection of e1 and e2

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let sigma = re.all_chars();
let b = re.exp(sigma, 4);    // all strings of length 4

let digits = re.range('0' as u32, '9' as u32);
let c = re.star(digits);    // all sequences of digits

let e = re.inter(b, c);     // all sequences of four digits

assert!(re.str_in_re(&"0000".into(), e));

Source

pub fn inter_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan

Intersection of multiple languages

This returns the intersection of a[0], a[1], etc.

return the full language (see full) if a is empty
return a[0] if a.len() == 1
otherwise construct the intersection.

See inter for an example.

Source

pub fn union(&mut self, e1: RegLan, e2: RegLan) -> RegLan

Union of two languages

Return the union of e1 and e2.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let abc = re.str(&"abc".into());
let de = re.str(&"de".into());
let u = re.union(abc, de);

// u contains two strings: "abc" and "de"
assert!(re.str_in_re(&"de".into(), u));
assert!(re.str_in_re(&"abc".into(), u));

Source

pub fn union_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan

Union of several languages

Return the union of a[0], a[1], …

if a is empty, return the empty language (see empty)
if a.len() == 1, return a[0]
otherwise, build the union.

See union.

Source

pub fn diff(&mut self, e1: RegLan, e2: RegLan) -> RegLan

Difference of two languages

Return the difference of e1 and e2. This is the same as the intersection of e1 and the complement of e2.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let sigma = re.all_chars();
let b = re.exp(sigma, 4);    // all strings of length 4

let digits = re.range('0' as u32, '9' as u32);
let c = re.star(digits);    // all sequences of digits

let e = re.diff(c, b);  // sequences of digits of length other than four

assert!(re.str_in_re(&"".into(), e)); // the empty sequence is included
assert!(! re.str_in_re(&"0000".into(), e));
assert!(re.str_in_re(&"123456".into(), e));

Source

pub fn diff_list( &mut self, e1: RegLan, a: impl IntoIterator<Item = RegLan>, ) -> RegLan

Difference of several languages

Return the difference of e1 and all regular expressions of a

return e1 if a is empty.

Source

pub fn all_chars(&mut self) -> RegLan

All one-character strings

This is the same as range(CharSet::all_chars). See range and CharSet::all_chars.

See diff for an example.

Source

pub fn char(&mut self, x: u32) -> RegLan

A character as a regular expression

Return the language that contains the one-character string x and nothing else.

this is the same as char_set(CharSet::singleton(x)). See char_set and CharSet::singleton.

§Panics

If x is not a valid SMT character (i.e., x > MAX_CHAR).

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let e = re.char('Z' as u32);

assert!(re.str_in_re(&"Z".into(), e));

Source

pub fn range(&mut self, start: u32, end: u32) -> RegLan

Range of characters

Return the language that contains all one-character strings that contains character in range [start, end]

this is the same as char_set(CharSet::range(start, end)). See char_set and CharSet::range.

§Panics

If the range is empty (i.e., start > end) or if end > MAX_CHAR.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let e = re.range('0' as u32, '9' as u32);

assert!(re.str_in_re(&"4".into(), e));

Source

pub fn star(&mut self, e: RegLan) -> RegLan

Kleene star closure

Return the star closure of language e (i.e., the concatenations of an arbitrary number of strings of e).

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let letters = re.range('a' as u32, 'z' as u32);
let letter_sequences = re.star(letters);

assert!(re.str_in_re(&"abcd".into(), letter_sequences));
assert!(re.str_in_re(&"".into(), letter_sequences));
assert!(! re.str_in_re(&"abc-def".into(), letter_sequences));

Source

pub fn plus(&mut self, e: RegLan) -> RegLan

Kleene plus

Return the closure of e (i.e., the concatenation of one or more strings of e)

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let letters = re.range('a' as u32, 'z' as u32);
let letter_sequences = re.plus(letters);

assert!(re.str_in_re(&"abcd".into(), letter_sequences));
assert!(! re.str_in_re(&"".into(), letter_sequences));
assert!(! re.str_in_re(&"abc-def".into(), letter_sequences));

Source

pub fn opt(&mut self, e: RegLan) -> RegLan

Option

Returns the union of epsilon and e.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let e = re.char('A' as u32);
let opt_e = re.opt(e);

// Both "A" and the empty strings are in `opt_e`
assert!(re.str_in_re(&"A".into(), opt_e));
assert!(re.str_in_re(&"".into(), opt_e));

Source

pub fn exp(&mut self, e: RegLan, k: u32) -> RegLan

Exponentiation

Concatenates e with itself k times.

return epsilon if k==0.

§Example

All strings of length 5.

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let a = re.all_chars();
let b = re.exp(a, 5);

assert!(re.str_in_re(&"ABCDE".into(), b));
assert!(! re.str_in_re(&"ABCD".into(), b));

Source

pub fn smt_loop(&mut self, e: RegLan, i: u32, j: u32) -> RegLan

Finite loop as defined in SMT-LIB

if i <= j, return the regular expression Loop(e, [i, j]). See mk_loop
if i > j, return the empty language. See empty.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let a = re.all_chars();
let b = re.smt_loop(a, 3, 7); // strings of length 3 to 7

assert!(re.str_in_re(&"abcdef".into(), b));

Source

pub fn smt_range(&mut self, s1: &SmtString, s2: &SmtString) -> RegLan

Character range as defined in SMT-LIB

if s1 and s2 are both singleton strings, and s1 <= s2 in the lexicographic ordering, return self.range(CharSet::range(c1, c2)) where c1 = unique character of s1 and c2 = unique character of s2.
otherwise, return the empty language

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let b = re.smt_range(&"a".into(), &"z".into());

assert!(re.str_in_re(&"h".into(), b));

Source

pub fn str(&mut self, s: &SmtString) -> RegLan

Language that contains a single string

Return the language that contains string s and nothing else.
This is the same as the SMT-LIB ‘str.to_re’ function.

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let s = re.str(&"alpha".into());

assert!(re.str_in_re(&"alpha".into(), s));
assert!(! re.str_in_re(&"beta".into(), s));

Source

pub fn start_char(&mut self, e: RegLan, c: u32) -> bool

Check whether character c can start expression e

§Example

use aws_smt_strings::regular_expressions::*;

let mut re = ReManager::new();

let digits = re.range('0' as u32, '9' as u32);
let s = re.star(digits);

assert!(re.start_char(s, '1' as u32));
assert!(! re.start_char(s, 'a' as u32));

Source

pub fn start_class(&mut self, e: RegLan, cid: ClassId) -> Result<bool, Error>

Check whether all characters in class cid can start e

return Error if cid is not a valid class for e

Source

pub fn class_derivative( &mut self, e: RegLan, cid: ClassId, ) -> Result<RegLan, Error>

Derivative with respect to a class id

Compute the derivative of e with respect to a class defined by cid

if cid is Interval(i): class = i-th derivative class of e
if cid is Complement: class = complementary derivative class of e

Derivative classes of e are indexed from 0 to n-1 where n is the number of classes.

§Errors

Return Err(Error::BadClassId) if the class id is invalid.

Class id Interval(i) is invalid if i is larger than or equal to the number of derivative classes of e.
Class if Complement is invalid if the complementary class of e is empty.

§Example

use aws_smt_strings::{regular_expressions::*, character_sets::*};
let mut re = ReManager::new();

let abc = re.str(&"abc".into());
let efg = re.str(&"efg".into());
let r = re.union(abc, efg); // 'abc' + 'efg': two derivative classes

let n = r.num_deriv_classes();
assert_eq!(n, 2);

let test1 = re.class_derivative(r, ClassId::Interval(0))?;
let test2 = re.class_derivative(r, ClassId::Interval(1))?;
let test3 = re.class_derivative(r, ClassId::Complement)?;

assert_eq!(test1, re.str(&"bc".into()));
assert_eq!(test2, re.str(&"fg".into()));
assert_eq!(test3, re.empty());

Source

pub fn class_derivative_unchecked(&mut self, e: RegLan, cid: ClassId) -> RegLan

Unchecked derivative with respect to a class id

Compute the derivative of e with respect to a class defined by cid. Does not check whether cid is a valid class id for e. See class_derivative

§Panics

If cid is not a valid class id for e.

Source

pub fn set_derivative( &mut self, e: RegLan, c: &CharSet, ) -> Result<RegLan, Error>

Derivative with respect to a character set

Return the derivative of e with respect to c provided this is well defined.

§Errors

The derivative with respect to c is well defined either if c is included in a derivative class of e or if c is included in the complementary class. If these conditions do not hold, the method return Err(Error::UndefinedDerivative). See Error.

§Example

use aws_smt_strings::{regular_expressions::*, character_sets::*};
let mut re = ReManager::new();

let a_to_z = re.range('a' as u32, 'z' as u32);
let e = re.plus(a_to_z); // non-empty sequences of lower-case ascii letters

// the derivative of e w.r.t. ['c', 't'] is defined.
let test = re.set_derivative(e, &CharSet::range('c' as u32, 't' as u32))?;

assert_eq!(test, re.star(a_to_z));

Source

pub fn set_derivative_unchecked(&mut self, e: RegLan, c: &CharSet) -> RegLan

Unchecked derivative with respect to a character set.

See derivative.

§Panics

If the derivative is not defined for this character set.

Source

pub fn char_derivative(&mut self, e: RegLan, c: u32) -> RegLan

Derivative with respect to a character

The derivative of e with respect to c is a regular expression e1 such every string of e that starts with c is formed by concatenating c and a string of e1. So the language of e1 is L(e1) = { w | c.w is in L(e) }

§Example

use aws_smt_strings::regular_expressions::*;

fn sum_of_str(re: &mut ReManager, s1: &str, s2: &str) -> RegLan {
    let s1 = re.str(&s1.into());
    let s2 = re.str(&s2.into());
    re.union(s1, s2)
}

let re = &mut ReManager::new();
let e = sum_of_str(re, "abc", "acc");

// e is 'abc + acc'
// the derivative of e w.r.t. 'a' is 'bc + cc'
let d = re.char_derivative(e, 'a' as u32);
assert_eq!(d, sum_of_str(re, "bc", "cc"));

Source

pub fn str_derivative(&mut self, e: RegLan, s: &SmtString) -> RegLan

Derivative with respect to a string

The derivative with respect to s is defined by induction on the length of s:

if s is empty, deriv(e, s) = e
if s is of the form a.w, then deriv(e, s) = deriv(w, deriv(a, s))

§Example

use aws_smt_strings::regular_expressions::*;

let re = &mut ReManager::new();
let abc = re.str(&"abc".into());
let acc = re.str(&"acc".into());
let e = re.union(abc, acc);

// e is 'abc + acc'
// the derivative of e with respect to "ab" is 'c'
let d1 = re.str_derivative(e, &"ab".into());
assert_eq!(d1, re.char('c' as u32));

// the derivative of e with respect to the empty string is e
let d2 = re.str_derivative(e, &"".into());
assert_eq!(d2, e);

Source

pub fn iter_derivatives(&mut self, e: RegLan) -> DerivativeIterator<'_> ⓘ

Construct an iterator to list the derivatives of a regular expression

The iterator produces e, then the derivatives of e, then the derivatives of these derivatives, and so forth. There are finitely many such derivatives. The iterator produces them without duplicates.

Source

pub fn str_in_re(&mut self, s: &SmtString, e: RegLan) -> bool

Check whether a string belongs to the language defined by a regular expression

§Example

use aws_smt_strings::regular_expressions::*;

let re = &mut ReManager::new();

// Build regular expression (ac + bc)*
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);

// Check that "acacbc" is in the language
assert!(re.str_in_re(&"acacbc".into(), e))

Source

pub fn is_empty_re(&mut self, e: RegLan) -> bool

Check whether a regular expression is empty

§Example

use aws_smt_strings::regular_expressions::*;

let re = &mut ReManager::new();

let full = re.full();
let abcd = re.str(&"abcd".into());
let bc = re.str(&"bc".into());

let a = re.concat(abcd, full); // strings that start with 'abcd'
let b = re.concat_list([full, bc, full]); // strings that contain 'bc'

let test = re.diff(a, b); // strings that start with 'abcd' but don't contain 'bc'
assert!(re.is_empty_re(test));

Source

pub fn get_string(&mut self, e: RegLan) -> Option<SmtString>

Get a string that belongs to a regular expression

Return None if the regular expression e is empty.

§Example

use aws_smt_strings::{regular_expressions::*, smt_strings::*};

let re = &mut ReManager::new();

let str1 = SmtString::from("abc");
let str2 = SmtString::from("bcd");

let abc = re.str(&str1);
let bcd = re.str(&str2);
let u = re.union(abc, bcd);

let str = re.get_string(u);

assert!(str == Some(str1) || str == Some(str2));

Source

pub fn compile(&mut self, e: RegLan) -> Automaton

Compile a regular expression to a deterministic finite state automaton

§Example

use aws_smt_strings::{regular_expressions::*, automata::*};

let re = &mut ReManager::new();

// (ac + bc)*
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);

// convert e to an automaton
let auto = re.compile(e);

// string accepted by the automaton
assert!(auto.accepts(&"acbcbc".into()))

Source

pub fn try_compile(&mut self, e: RegLan, max_states: usize) -> Option<Automaton>

Compile a regular expression to a DFA of bounded size

Try to compile a regular expression e to a deterministic finite-state automaton of size no more than max_states.

e: regular expression
max_states: bound

Return None if the DFA has more than max_states Return Some(a) otherwise where a is the automaton

§Example

use aws_smt_strings::{regular_expressions::*, automata::*};

let re = &mut ReManager::new();

// (ac + bc)+
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.plus(sum);

// the smallest DFA that recognizes e has four states
let test1 = re.try_compile(e, 3);
assert!(test1.is_none());

let test2 = re.try_compile(e, 4);
assert!(test2.is_some());