pub struct ReManager { /* private fields */ }
Expand description
A store for constructing regular expressions using hash-consing.
The store ensures that each regular expression has a unique integer id.
For all regular expressions e1 and e2 constructed with the same manager, we have e1.expr == e2.expr iff e1.id == e2.id
§Examples
This example shows how to create the regular expression (ac + bc)*
and
compute its derivatives.
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
let ac = re.str(&"ac".into()); // ac
let bc = re.str(&"bc".into()); // bc
let sum = re.union(ac, bc); // ac + bc
let e = re.star(sum); // (ac + bc)*
let d1 = re.char_derivative(e, 'a' as u32); // derivative of e w.r.t. 'a'
let d2 = re.char_derivative(e, 'b' as u32); // derivative of e w.r.t. 'b'
// by hash-consing: d1 and d2 are equal
assert_eq!(d1, d2);
assert!(std::ptr::eq(d1, d2));
Implementations§
Source§impl ReManager
impl ReManager
Sourcepub fn empty(&self) -> RegLan
pub fn empty(&self) -> RegLan
The empty language
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new(); // create a manager
let e = re.empty(); // get the empty language
// no string belongs to e.
assert!(! re.str_in_re(&"0129".into(), e));
Sourcepub fn full(&self) -> RegLan
pub fn full(&self) -> RegLan
The full language
This language contains every strings. It’s the complement of empty.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new(); // create a manager
let e = re.full(); // get the full language
// every string belongs to e.
assert!(re.str_in_re(&"0129".into(), e));
Sourcepub fn epsilon(&self) -> RegLan
pub fn epsilon(&self) -> RegLan
The RE that contains only the empty string
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.epsilon();
// the empty string belongs to e
assert!(re.str_in_re(&"".into(), e));
// a non-empty string does not belong to e
assert!(! re.str_in_re(&"a".into(), e));
Sourcepub fn sigma_plus(&self) -> RegLan
pub fn sigma_plus(&self) -> RegLan
The RE that contains all non-empty strings
This is the complement of epsilon.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.sigma_plus();
// the empty string does not belong to e
assert!(! re.str_in_re(&"".into(), e));
// any non-empty string belongs to e
assert!(re.str_in_re(&"a".into(), e));
Sourcepub fn char_set(&mut self, set: CharSet) -> RegLan
pub fn char_set(&mut self, set: CharSet) -> RegLan
Regular expression defined by a character set
Return the regular expression that contains all single-character strings with characters in the specified set. See also range.
§Example
Lower-case letters in ASCII.
use aws_smt_strings::regular_expressions::*;
use aws_smt_strings::character_sets::*;
let mut re = ReManager::new();
let set = CharSet::range('a' as u32, 'z' as u32);
let e = re.char_set(set);
// a single-character string that's in e
assert!(re.str_in_re(&"c".into(), e));
// a single-character string that's not in e
assert!(!re.str_in_re(&"7".into(), e));
// strings with more than one characters are not in e
assert!(!re.str_in_re(&"abc".into(), e));
Sourcepub fn complement(&mut self, e: RegLan) -> RegLan
pub fn complement(&mut self, e: RegLan) -> RegLan
Complement of a language
Return the complement of RE e
.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a_single_digit = re.range('0' as u32, '9' as u32);
let not_a_digit = re.complement(a_single_digit);
assert!(re.str_in_re(&"7".into(), a_single_digit));
assert!(! re.str_in_re(&"7".into(), not_a_digit));
// any string of more than 2 characters is not a single digit!
assert!(re.str_in_re(&"94".into(), not_a_digit))
Sourcepub fn concat(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn concat(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Concatenation of two languages
Concatenate languages e1
and e2
in that order.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a_letter = re.range('a' as u32, 'z' as u32);
let a_digit = re.range('0' as u32, '9' as u32);
let e = re.concat(a_letter, a_digit);
assert!(re.str_in_re(&"h4".into(), e));
Sourcepub fn concat_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
pub fn concat_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
Concatenation of multiple languages
Build the concatenation of a[0]
, a[1]
, … in this order.
- return epsilon is
a
is empty. - return
a[0]
isa.len() == 1
See concat
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a_letter = re.range('a' as u32, 'z' as u32);
let a_digit = re.range('0' as u32, '9' as u32);
let e = re.concat_list([a_letter, a_letter, a_digit]);
assert!(re.str_in_re(&"ab3".into(), e));
Sourcepub fn mk_loop(&mut self, e: RegLan, range: LoopRange) -> RegLan
pub fn mk_loop(&mut self, e: RegLan, range: LoopRange) -> RegLan
Generalized loop
Parameter range
defines an interval of natural numbers [i, j]
where j
may be infinity.
This method returns the regular language equal to the union of e
k for k in the interval [i, j]
.
See loop_ranges.
§Example
Regular expression that recognizes sequences of 3 to 5 digits.
use aws_smt_strings::{regular_expressions::*, loop_ranges::*};
let mut re = ReManager::new();
let digits = re.range('0' as u32, '9' as u32);
let e = re.mk_loop(digits, LoopRange::finite(3, 5));
assert!(re.str_in_re(&"12345".into(), e));
assert!(! re.str_in_re(&"123456".into(), e));
Sourcepub fn inter(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn inter(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Intersection of two languages
Return the intersection of e1
and e2
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let sigma = re.all_chars();
let b = re.exp(sigma, 4); // all strings of length 4
let digits = re.range('0' as u32, '9' as u32);
let c = re.star(digits); // all sequences of digits
let e = re.inter(b, c); // all sequences of four digits
assert!(re.str_in_re(&"0000".into(), e));
Sourcepub fn inter_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
pub fn inter_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
Sourcepub fn union(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn union(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Union of two languages
Return the union of e1
and e2
.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let abc = re.str(&"abc".into());
let de = re.str(&"de".into());
let u = re.union(abc, de);
// u contains two strings: "abc" and "de"
assert!(re.str_in_re(&"de".into(), u));
assert!(re.str_in_re(&"abc".into(), u));
Sourcepub fn union_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
pub fn union_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
Sourcepub fn diff(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn diff(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Difference of two languages
Return the difference of e1
and e2
. This is the same as
the intersection of e1
and the complement of e2
.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let sigma = re.all_chars();
let b = re.exp(sigma, 4); // all strings of length 4
let digits = re.range('0' as u32, '9' as u32);
let c = re.star(digits); // all sequences of digits
let e = re.diff(c, b); // sequences of digits of length other than four
assert!(re.str_in_re(&"".into(), e)); // the empty sequence is included
assert!(! re.str_in_re(&"0000".into(), e));
assert!(re.str_in_re(&"123456".into(), e));
Sourcepub fn diff_list(
&mut self,
e1: RegLan,
a: impl IntoIterator<Item = RegLan>,
) -> RegLan
pub fn diff_list( &mut self, e1: RegLan, a: impl IntoIterator<Item = RegLan>, ) -> RegLan
Difference of several languages
Return the difference of e1
and all regular expressions of a
- return
e1
ifa
is empty.
Sourcepub fn all_chars(&mut self) -> RegLan
pub fn all_chars(&mut self) -> RegLan
All one-character strings
This is the same as range(CharSet::all_chars)
. See range and CharSet::all_chars.
See diff for an example.
Sourcepub fn char(&mut self, x: u32) -> RegLan
pub fn char(&mut self, x: u32) -> RegLan
A character as a regular expression
Return the language that contains the one-character string x
and nothing else.
- this is the same as
char_set(CharSet::singleton(x))
. See char_set and CharSet::singleton.
§Panics
If x is not a valid SMT character (i.e., x > MAX_CHAR).
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.char('Z' as u32);
assert!(re.str_in_re(&"Z".into(), e));
Sourcepub fn range(&mut self, start: u32, end: u32) -> RegLan
pub fn range(&mut self, start: u32, end: u32) -> RegLan
Range of characters
Return the language that contains all one-character strings that contains character in range [start, end]
- this is the same as
char_set(CharSet::range(start, end))
. See char_set and CharSet::range.
§Panics
If the range is empty (i.e., start > end) or if end > MAX_CHAR.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.range('0' as u32, '9' as u32);
assert!(re.str_in_re(&"4".into(), e));
Sourcepub fn star(&mut self, e: RegLan) -> RegLan
pub fn star(&mut self, e: RegLan) -> RegLan
Kleene star closure
Return the star closure of language e
(i.e., the concatenations of an arbitrary number
of strings of e
).
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let letters = re.range('a' as u32, 'z' as u32);
let letter_sequences = re.star(letters);
assert!(re.str_in_re(&"abcd".into(), letter_sequences));
assert!(re.str_in_re(&"".into(), letter_sequences));
assert!(! re.str_in_re(&"abc-def".into(), letter_sequences));
Sourcepub fn plus(&mut self, e: RegLan) -> RegLan
pub fn plus(&mut self, e: RegLan) -> RegLan
Kleene plus
Return the closure of e
(i.e., the concatenation of one or more strings of e
)
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let letters = re.range('a' as u32, 'z' as u32);
let letter_sequences = re.plus(letters);
assert!(re.str_in_re(&"abcd".into(), letter_sequences));
assert!(! re.str_in_re(&"".into(), letter_sequences));
assert!(! re.str_in_re(&"abc-def".into(), letter_sequences));
Sourcepub fn opt(&mut self, e: RegLan) -> RegLan
pub fn opt(&mut self, e: RegLan) -> RegLan
Option
Returns the union of epsilon and e
.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.char('A' as u32);
let opt_e = re.opt(e);
// Both "A" and the empty strings are in `opt_e`
assert!(re.str_in_re(&"A".into(), opt_e));
assert!(re.str_in_re(&"".into(), opt_e));
Sourcepub fn exp(&mut self, e: RegLan, k: u32) -> RegLan
pub fn exp(&mut self, e: RegLan, k: u32) -> RegLan
Exponentiation
Concatenates e
with itself k
times.
- return epsilon if
k==0
.
§Example
All strings of length 5.
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a = re.all_chars();
let b = re.exp(a, 5);
assert!(re.str_in_re(&"ABCDE".into(), b));
assert!(! re.str_in_re(&"ABCD".into(), b));
Sourcepub fn smt_loop(&mut self, e: RegLan, i: u32, j: u32) -> RegLan
pub fn smt_loop(&mut self, e: RegLan, i: u32, j: u32) -> RegLan
Finite loop as defined in SMT-LIB
- if
i <= j
, return the regular expressionLoop(e, [i, j])
. See mk_loop - if
i > j
, return the empty language. See empty.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a = re.all_chars();
let b = re.smt_loop(a, 3, 7); // strings of length 3 to 7
assert!(re.str_in_re(&"abcdef".into(), b));
Sourcepub fn smt_range(&mut self, s1: &SmtString, s2: &SmtString) -> RegLan
pub fn smt_range(&mut self, s1: &SmtString, s2: &SmtString) -> RegLan
Character range as defined in SMT-LIB
- if
s1
ands2
are both singleton strings, ands1 <= s2
in the lexicographic ordering, return self.range(CharSet::range(c1, c2)) where c1 = unique character of s1 and c2 = unique character of s2. - otherwise, return the empty language
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let b = re.smt_range(&"a".into(), &"z".into());
assert!(re.str_in_re(&"h".into(), b));
Sourcepub fn str(&mut self, s: &SmtString) -> RegLan
pub fn str(&mut self, s: &SmtString) -> RegLan
Language that contains a single string
- Return the language that contains string
s
and nothing else. - This is the same as the SMT-LIB ‘str.to_re’ function.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let s = re.str(&"alpha".into());
assert!(re.str_in_re(&"alpha".into(), s));
assert!(! re.str_in_re(&"beta".into(), s));
Sourcepub fn start_char(&mut self, e: RegLan, c: u32) -> bool
pub fn start_char(&mut self, e: RegLan, c: u32) -> bool
Check whether character c can start expression e
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let digits = re.range('0' as u32, '9' as u32);
let s = re.star(digits);
assert!(re.start_char(s, '1' as u32));
assert!(! re.start_char(s, 'a' as u32));
Sourcepub fn start_class(&mut self, e: RegLan, cid: ClassId) -> Result<bool, Error>
pub fn start_class(&mut self, e: RegLan, cid: ClassId) -> Result<bool, Error>
Check whether all characters in class cid can start e
- return Error if cid is not a valid class for e
Sourcepub fn class_derivative(
&mut self,
e: RegLan,
cid: ClassId,
) -> Result<RegLan, Error>
pub fn class_derivative( &mut self, e: RegLan, cid: ClassId, ) -> Result<RegLan, Error>
Derivative with respect to a class id
Compute the derivative of e with respect to a class defined by cid
- if
cid
isInterval(i)
: class = i-th derivative class ofe
- if
cid
isComplement
: class = complementary derivative class ofe
Derivative classes of e
are indexed from 0 to n
-1 where n
is the
number of classes.
§Errors
Return Err(Error::BadClassId) if the class id is invalid.
- Class id
Interval(i)
is invalid if i is larger than or equal to the number of derivative classes ofe
. - Class if
Complement
is invalid if the complementary class of e is empty.
§Example
use aws_smt_strings::{regular_expressions::*, character_sets::*};
let mut re = ReManager::new();
let abc = re.str(&"abc".into());
let efg = re.str(&"efg".into());
let r = re.union(abc, efg); // 'abc' + 'efg': two derivative classes
let n = r.num_deriv_classes();
assert_eq!(n, 2);
let test1 = re.class_derivative(r, ClassId::Interval(0))?;
let test2 = re.class_derivative(r, ClassId::Interval(1))?;
let test3 = re.class_derivative(r, ClassId::Complement)?;
assert_eq!(test1, re.str(&"bc".into()));
assert_eq!(test2, re.str(&"fg".into()));
assert_eq!(test3, re.empty());
Sourcepub fn class_derivative_unchecked(&mut self, e: RegLan, cid: ClassId) -> RegLan
pub fn class_derivative_unchecked(&mut self, e: RegLan, cid: ClassId) -> RegLan
Unchecked derivative with respect to a class id
Compute the derivative of e with respect to a class defined by cid. Does not check whether cid is a valid class id for e. See class_derivative
§Panics
If cid is not a valid class id for e.
Sourcepub fn set_derivative(
&mut self,
e: RegLan,
c: &CharSet,
) -> Result<RegLan, Error>
pub fn set_derivative( &mut self, e: RegLan, c: &CharSet, ) -> Result<RegLan, Error>
Derivative with respect to a character set
Return the derivative of e with respect to c provided this is well defined.
§Errors
The derivative with respect to c is well defined either if c is included in a derivative class of e or if c is included in the complementary class. If these conditions do not hold, the method return Err(Error::UndefinedDerivative). See Error.
§Example
use aws_smt_strings::{regular_expressions::*, character_sets::*};
let mut re = ReManager::new();
let a_to_z = re.range('a' as u32, 'z' as u32);
let e = re.plus(a_to_z); // non-empty sequences of lower-case ascii letters
// the derivative of e w.r.t. ['c', 't'] is defined.
let test = re.set_derivative(e, &CharSet::range('c' as u32, 't' as u32))?;
assert_eq!(test, re.star(a_to_z));
Sourcepub fn set_derivative_unchecked(&mut self, e: RegLan, c: &CharSet) -> RegLan
pub fn set_derivative_unchecked(&mut self, e: RegLan, c: &CharSet) -> RegLan
Unchecked derivative with respect to a character set.
See derivative.
§Panics
If the derivative is not defined for this character set.
Sourcepub fn char_derivative(&mut self, e: RegLan, c: u32) -> RegLan
pub fn char_derivative(&mut self, e: RegLan, c: u32) -> RegLan
Derivative with respect to a character
The derivative of e with respect to c is a regular expression e1 such every string of e that starts with c is formed by concatenating c and a string of e1. So the language of e1 is L(e1) = { w | c.w is in L(e) }
§Example
use aws_smt_strings::regular_expressions::*;
fn sum_of_str(re: &mut ReManager, s1: &str, s2: &str) -> RegLan {
let s1 = re.str(&s1.into());
let s2 = re.str(&s2.into());
re.union(s1, s2)
}
let re = &mut ReManager::new();
let e = sum_of_str(re, "abc", "acc");
// e is 'abc + acc'
// the derivative of e w.r.t. 'a' is 'bc + cc'
let d = re.char_derivative(e, 'a' as u32);
assert_eq!(d, sum_of_str(re, "bc", "cc"));
Sourcepub fn str_derivative(&mut self, e: RegLan, s: &SmtString) -> RegLan
pub fn str_derivative(&mut self, e: RegLan, s: &SmtString) -> RegLan
Derivative with respect to a string
The derivative with respect to s is defined by induction on the length of s:
- if s is empty, deriv(e, s) = e
- if s is of the form a.w, then deriv(e, s) = deriv(w, deriv(a, s))
§Example
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
let abc = re.str(&"abc".into());
let acc = re.str(&"acc".into());
let e = re.union(abc, acc);
// e is 'abc + acc'
// the derivative of e with respect to "ab" is 'c'
let d1 = re.str_derivative(e, &"ab".into());
assert_eq!(d1, re.char('c' as u32));
// the derivative of e with respect to the empty string is e
let d2 = re.str_derivative(e, &"".into());
assert_eq!(d2, e);
Sourcepub fn iter_derivatives(&mut self, e: RegLan) -> DerivativeIterator<'_> ⓘ
pub fn iter_derivatives(&mut self, e: RegLan) -> DerivativeIterator<'_> ⓘ
Construct an iterator to list the derivatives of a regular expression
The iterator produces e
, then the derivatives of e
, then the derivatives
of these derivatives, and so forth. There are finitely many such derivatives.
The iterator produces them without duplicates.
Sourcepub fn str_in_re(&mut self, s: &SmtString, e: RegLan) -> bool
pub fn str_in_re(&mut self, s: &SmtString, e: RegLan) -> bool
Check whether a string belongs to the language defined by a regular expression
§Example
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
// Build regular expression (ac + bc)*
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);
// Check that "acacbc" is in the language
assert!(re.str_in_re(&"acacbc".into(), e))
Sourcepub fn is_empty_re(&mut self, e: RegLan) -> bool
pub fn is_empty_re(&mut self, e: RegLan) -> bool
Check whether a regular expression is empty
§Example
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
let full = re.full();
let abcd = re.str(&"abcd".into());
let bc = re.str(&"bc".into());
let a = re.concat(abcd, full); // strings that start with 'abcd'
let b = re.concat_list([full, bc, full]); // strings that contain 'bc'
let test = re.diff(a, b); // strings that start with 'abcd' but don't contain 'bc'
assert!(re.is_empty_re(test));
Sourcepub fn get_string(&mut self, e: RegLan) -> Option<SmtString>
pub fn get_string(&mut self, e: RegLan) -> Option<SmtString>
Get a string that belongs to a regular expression
Return None if the regular expression e
is empty.
§Example
use aws_smt_strings::{regular_expressions::*, smt_strings::*};
let re = &mut ReManager::new();
let str1 = SmtString::from("abc");
let str2 = SmtString::from("bcd");
let abc = re.str(&str1);
let bcd = re.str(&str2);
let u = re.union(abc, bcd);
let str = re.get_string(u);
assert!(str == Some(str1) || str == Some(str2));
Sourcepub fn compile(&mut self, e: RegLan) -> Automaton
pub fn compile(&mut self, e: RegLan) -> Automaton
Compile a regular expression to a deterministic finite state automaton
§Example
use aws_smt_strings::{regular_expressions::*, automata::*};
let re = &mut ReManager::new();
// (ac + bc)*
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);
// convert e to an automaton
let auto = re.compile(e);
// string accepted by the automaton
assert!(auto.accepts(&"acbcbc".into()))
Sourcepub fn try_compile(&mut self, e: RegLan, max_states: usize) -> Option<Automaton>
pub fn try_compile(&mut self, e: RegLan, max_states: usize) -> Option<Automaton>
Compile a regular expression to a DFA of bounded size
Try to compile a regular expression e
to a deterministic finite-state automaton
of size no more than max_states
.
- e: regular expression
- max_states: bound
Return None if the DFA has more than max_states
Return Some(a)
otherwise where a
is the automaton
§Example
use aws_smt_strings::{regular_expressions::*, automata::*};
let re = &mut ReManager::new();
// (ac + bc)+
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.plus(sum);
// the smallest DFA that recognizes e has four states
let test1 = re.try_compile(e, 3);
assert!(test1.is_none());
let test2 = re.try_compile(e, 4);
assert!(test2.is_some());