pub struct ReManager { /* private fields */ }Expand description
A store for constructing regular expressions using hash-consing.
The store ensures that each regular expression has a unique integer id.
For all regular expressions e1 and e2 constructed with the same manager, we have e1.expr == e2.expr iff e1.id == e2.id
§Examples
This example shows how to create the regular expression (ac + bc)* and
compute its derivatives.
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
let ac = re.str(&"ac".into()); // ac
let bc = re.str(&"bc".into()); // bc
let sum = re.union(ac, bc); // ac + bc
let e = re.star(sum); // (ac + bc)*
let d1 = re.char_derivative(e, 'a' as u32); // derivative of e w.r.t. 'a'
let d2 = re.char_derivative(e, 'b' as u32); // derivative of e w.r.t. 'b'
// by hash-consing: d1 and d2 are equal
assert_eq!(d1, d2);
assert!(std::ptr::eq(d1, d2));Implementations§
Source§impl ReManager
impl ReManager
Sourcepub fn empty(&self) -> RegLan
pub fn empty(&self) -> RegLan
The empty language
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new(); // create a manager
let e = re.empty(); // get the empty language
// no string belongs to e.
assert!(! re.str_in_re(&"0129".into(), e));Sourcepub fn full(&self) -> RegLan
pub fn full(&self) -> RegLan
The full language
This language contains every strings. It’s the complement of empty.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new(); // create a manager
let e = re.full(); // get the full language
// every string belongs to e.
assert!(re.str_in_re(&"0129".into(), e));Sourcepub fn epsilon(&self) -> RegLan
pub fn epsilon(&self) -> RegLan
The RE that contains only the empty string
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.epsilon();
// the empty string belongs to e
assert!(re.str_in_re(&"".into(), e));
// a non-empty string does not belong to e
assert!(! re.str_in_re(&"a".into(), e));Sourcepub fn sigma_plus(&self) -> RegLan
pub fn sigma_plus(&self) -> RegLan
The RE that contains all non-empty strings
This is the complement of epsilon.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.sigma_plus();
// the empty string does not belong to e
assert!(! re.str_in_re(&"".into(), e));
// any non-empty string belongs to e
assert!(re.str_in_re(&"a".into(), e));Sourcepub fn char_set(&mut self, set: CharSet) -> RegLan
pub fn char_set(&mut self, set: CharSet) -> RegLan
Regular expression defined by a character set
Return the regular expression that contains all single-character strings with characters in the specified set. See also range.
§Example
Lower-case letters in ASCII.
use aws_smt_strings::regular_expressions::*;
use aws_smt_strings::character_sets::*;
let mut re = ReManager::new();
let set = CharSet::range('a' as u32, 'z' as u32);
let e = re.char_set(set);
// a single-character string that's in e
assert!(re.str_in_re(&"c".into(), e));
// a single-character string that's not in e
assert!(!re.str_in_re(&"7".into(), e));
// strings with more than one characters are not in e
assert!(!re.str_in_re(&"abc".into(), e));Sourcepub fn complement(&mut self, e: RegLan) -> RegLan
pub fn complement(&mut self, e: RegLan) -> RegLan
Complement of a language
Return the complement of RE e.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a_single_digit = re.range('0' as u32, '9' as u32);
let not_a_digit = re.complement(a_single_digit);
assert!(re.str_in_re(&"7".into(), a_single_digit));
assert!(! re.str_in_re(&"7".into(), not_a_digit));
// any string of more than 2 characters is not a single digit!
assert!(re.str_in_re(&"94".into(), not_a_digit))Sourcepub fn concat(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn concat(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Concatenation of two languages
Concatenate languages e1 and e2 in that order.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a_letter = re.range('a' as u32, 'z' as u32);
let a_digit = re.range('0' as u32, '9' as u32);
let e = re.concat(a_letter, a_digit);
assert!(re.str_in_re(&"h4".into(), e));Sourcepub fn concat_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
pub fn concat_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
Concatenation of multiple languages
Build the concatenation of a[0], a[1], … in this order.
- return epsilon is
ais empty. - return
a[0]isa.len() == 1
See concat
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a_letter = re.range('a' as u32, 'z' as u32);
let a_digit = re.range('0' as u32, '9' as u32);
let e = re.concat_list([a_letter, a_letter, a_digit]);
assert!(re.str_in_re(&"ab3".into(), e));Sourcepub fn mk_loop(&mut self, e: RegLan, range: LoopRange) -> RegLan
pub fn mk_loop(&mut self, e: RegLan, range: LoopRange) -> RegLan
Generalized loop
Parameter range defines an interval of natural numbers [i, j] where j may be infinity.
This method returns the regular language equal to the union of ek for k in the interval [i, j].
See loop_ranges.
§Example
Regular expression that recognizes sequences of 3 to 5 digits.
use aws_smt_strings::{regular_expressions::*, loop_ranges::*};
let mut re = ReManager::new();
let digits = re.range('0' as u32, '9' as u32);
let e = re.mk_loop(digits, LoopRange::finite(3, 5));
assert!(re.str_in_re(&"12345".into(), e));
assert!(! re.str_in_re(&"123456".into(), e));Sourcepub fn inter(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn inter(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Intersection of two languages
Return the intersection of e1 and e2
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let sigma = re.all_chars();
let b = re.exp(sigma, 4); // all strings of length 4
let digits = re.range('0' as u32, '9' as u32);
let c = re.star(digits); // all sequences of digits
let e = re.inter(b, c); // all sequences of four digits
assert!(re.str_in_re(&"0000".into(), e));Sourcepub fn inter_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
pub fn inter_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
Sourcepub fn union(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn union(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Union of two languages
Return the union of e1 and e2.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let abc = re.str(&"abc".into());
let de = re.str(&"de".into());
let u = re.union(abc, de);
// u contains two strings: "abc" and "de"
assert!(re.str_in_re(&"de".into(), u));
assert!(re.str_in_re(&"abc".into(), u));Sourcepub fn union_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
pub fn union_list(&mut self, a: impl IntoIterator<Item = RegLan>) -> RegLan
Sourcepub fn diff(&mut self, e1: RegLan, e2: RegLan) -> RegLan
pub fn diff(&mut self, e1: RegLan, e2: RegLan) -> RegLan
Difference of two languages
Return the difference of e1 and e2. This is the same as
the intersection of e1 and the complement of e2.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let sigma = re.all_chars();
let b = re.exp(sigma, 4); // all strings of length 4
let digits = re.range('0' as u32, '9' as u32);
let c = re.star(digits); // all sequences of digits
let e = re.diff(c, b); // sequences of digits of length other than four
assert!(re.str_in_re(&"".into(), e)); // the empty sequence is included
assert!(! re.str_in_re(&"0000".into(), e));
assert!(re.str_in_re(&"123456".into(), e));Sourcepub fn diff_list(
&mut self,
e1: RegLan,
a: impl IntoIterator<Item = RegLan>,
) -> RegLan
pub fn diff_list( &mut self, e1: RegLan, a: impl IntoIterator<Item = RegLan>, ) -> RegLan
Difference of several languages
Return the difference of e1 and all regular expressions of a
- return
e1ifais empty.
Sourcepub fn all_chars(&mut self) -> RegLan
pub fn all_chars(&mut self) -> RegLan
All one-character strings
This is the same as range(CharSet::all_chars). See range and CharSet::all_chars.
See diff for an example.
Sourcepub fn char(&mut self, x: u32) -> RegLan
pub fn char(&mut self, x: u32) -> RegLan
A character as a regular expression
Return the language that contains the one-character string x and nothing else.
- this is the same as
char_set(CharSet::singleton(x)). See char_set and CharSet::singleton.
§Panics
If x is not a valid SMT character (i.e., x > MAX_CHAR).
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.char('Z' as u32);
assert!(re.str_in_re(&"Z".into(), e));Sourcepub fn range(&mut self, start: u32, end: u32) -> RegLan
pub fn range(&mut self, start: u32, end: u32) -> RegLan
Range of characters
Return the language that contains all one-character strings that contains character in range [start, end]
- this is the same as
char_set(CharSet::range(start, end)). See char_set and CharSet::range.
§Panics
If the range is empty (i.e., start > end) or if end > MAX_CHAR.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.range('0' as u32, '9' as u32);
assert!(re.str_in_re(&"4".into(), e));Sourcepub fn star(&mut self, e: RegLan) -> RegLan
pub fn star(&mut self, e: RegLan) -> RegLan
Kleene star closure
Return the star closure of language e (i.e., the concatenations of an arbitrary number
of strings of e).
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let letters = re.range('a' as u32, 'z' as u32);
let letter_sequences = re.star(letters);
assert!(re.str_in_re(&"abcd".into(), letter_sequences));
assert!(re.str_in_re(&"".into(), letter_sequences));
assert!(! re.str_in_re(&"abc-def".into(), letter_sequences));Sourcepub fn plus(&mut self, e: RegLan) -> RegLan
pub fn plus(&mut self, e: RegLan) -> RegLan
Kleene plus
Return the closure of e (i.e., the concatenation of one or more strings of e)
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let letters = re.range('a' as u32, 'z' as u32);
let letter_sequences = re.plus(letters);
assert!(re.str_in_re(&"abcd".into(), letter_sequences));
assert!(! re.str_in_re(&"".into(), letter_sequences));
assert!(! re.str_in_re(&"abc-def".into(), letter_sequences));Sourcepub fn opt(&mut self, e: RegLan) -> RegLan
pub fn opt(&mut self, e: RegLan) -> RegLan
Option
Returns the union of epsilon and e.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let e = re.char('A' as u32);
let opt_e = re.opt(e);
// Both "A" and the empty strings are in `opt_e`
assert!(re.str_in_re(&"A".into(), opt_e));
assert!(re.str_in_re(&"".into(), opt_e));Sourcepub fn exp(&mut self, e: RegLan, k: u32) -> RegLan
pub fn exp(&mut self, e: RegLan, k: u32) -> RegLan
Exponentiation
Concatenates e with itself k times.
- return epsilon if
k==0.
§Example
All strings of length 5.
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a = re.all_chars();
let b = re.exp(a, 5);
assert!(re.str_in_re(&"ABCDE".into(), b));
assert!(! re.str_in_re(&"ABCD".into(), b));Sourcepub fn smt_loop(&mut self, e: RegLan, i: u32, j: u32) -> RegLan
pub fn smt_loop(&mut self, e: RegLan, i: u32, j: u32) -> RegLan
Finite loop as defined in SMT-LIB
- if
i <= j, return the regular expressionLoop(e, [i, j]). See mk_loop - if
i > j, return the empty language. See empty.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let a = re.all_chars();
let b = re.smt_loop(a, 3, 7); // strings of length 3 to 7
assert!(re.str_in_re(&"abcdef".into(), b));Sourcepub fn smt_range(&mut self, s1: &SmtString, s2: &SmtString) -> RegLan
pub fn smt_range(&mut self, s1: &SmtString, s2: &SmtString) -> RegLan
Character range as defined in SMT-LIB
- if
s1ands2are both singleton strings, ands1 <= s2in the lexicographic ordering, return self.range(CharSet::range(c1, c2)) where c1 = unique character of s1 and c2 = unique character of s2. - otherwise, return the empty language
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let b = re.smt_range(&"a".into(), &"z".into());
assert!(re.str_in_re(&"h".into(), b));Sourcepub fn str(&mut self, s: &SmtString) -> RegLan
pub fn str(&mut self, s: &SmtString) -> RegLan
Language that contains a single string
- Return the language that contains string
sand nothing else. - This is the same as the SMT-LIB ‘str.to_re’ function.
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let s = re.str(&"alpha".into());
assert!(re.str_in_re(&"alpha".into(), s));
assert!(! re.str_in_re(&"beta".into(), s));Sourcepub fn start_char(&mut self, e: RegLan, c: u32) -> bool
pub fn start_char(&mut self, e: RegLan, c: u32) -> bool
Check whether character c can start expression e
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let digits = re.range('0' as u32, '9' as u32);
let s = re.star(digits);
assert!(re.start_char(s, '1' as u32));
assert!(! re.start_char(s, 'a' as u32));Sourcepub fn start_class(&mut self, e: RegLan, cid: ClassId) -> Result<bool, Error>
pub fn start_class(&mut self, e: RegLan, cid: ClassId) -> Result<bool, Error>
Check whether all characters in class cid can start e
- return Error if cid is not a valid class for e
Sourcepub fn class_derivative(
&mut self,
e: RegLan,
cid: ClassId,
) -> Result<RegLan, Error>
pub fn class_derivative( &mut self, e: RegLan, cid: ClassId, ) -> Result<RegLan, Error>
Derivative with respect to a class id
Compute the derivative of e with respect to a class defined by cid
- if
cidisInterval(i): class = i-th derivative class ofe - if
cidisComplement: class = complementary derivative class ofe
Derivative classes of e are indexed from 0 to n-1 where n is the
number of classes.
§Errors
Return Err(Error::BadClassId) if the class id is invalid.
- Class id
Interval(i)is invalid if i is larger than or equal to the number of derivative classes ofe. - Class if
Complementis invalid if the complementary class of e is empty.
§Example
use aws_smt_strings::{regular_expressions::*, character_sets::*};
let mut re = ReManager::new();
let abc = re.str(&"abc".into());
let efg = re.str(&"efg".into());
let r = re.union(abc, efg); // 'abc' + 'efg': two derivative classes
let n = r.num_deriv_classes();
assert_eq!(n, 2);
let test1 = re.class_derivative(r, ClassId::Interval(0))?;
let test2 = re.class_derivative(r, ClassId::Interval(1))?;
let test3 = re.class_derivative(r, ClassId::Complement)?;
assert_eq!(test1, re.str(&"bc".into()));
assert_eq!(test2, re.str(&"fg".into()));
assert_eq!(test3, re.empty());Sourcepub fn class_derivative_unchecked(&mut self, e: RegLan, cid: ClassId) -> RegLan
pub fn class_derivative_unchecked(&mut self, e: RegLan, cid: ClassId) -> RegLan
Unchecked derivative with respect to a class id
Compute the derivative of e with respect to a class defined by cid. Does not check whether cid is a valid class id for e. See class_derivative
§Panics
If cid is not a valid class id for e.
Sourcepub fn set_derivative(
&mut self,
e: RegLan,
c: &CharSet,
) -> Result<RegLan, Error>
pub fn set_derivative( &mut self, e: RegLan, c: &CharSet, ) -> Result<RegLan, Error>
Derivative with respect to a character set
Return the derivative of e with respect to c provided this is well defined.
§Errors
The derivative with respect to c is well defined either if c is included in a derivative class of e or if c is included in the complementary class. If these conditions do not hold, the method return Err(Error::UndefinedDerivative). See Error.
§Example
use aws_smt_strings::{regular_expressions::*, character_sets::*};
let mut re = ReManager::new();
let a_to_z = re.range('a' as u32, 'z' as u32);
let e = re.plus(a_to_z); // non-empty sequences of lower-case ascii letters
// the derivative of e w.r.t. ['c', 't'] is defined.
let test = re.set_derivative(e, &CharSet::range('c' as u32, 't' as u32))?;
assert_eq!(test, re.star(a_to_z));Sourcepub fn set_derivative_unchecked(&mut self, e: RegLan, c: &CharSet) -> RegLan
pub fn set_derivative_unchecked(&mut self, e: RegLan, c: &CharSet) -> RegLan
Unchecked derivative with respect to a character set.
See derivative.
§Panics
If the derivative is not defined for this character set.
Sourcepub fn char_derivative(&mut self, e: RegLan, c: u32) -> RegLan
pub fn char_derivative(&mut self, e: RegLan, c: u32) -> RegLan
Derivative with respect to a character
The derivative of e with respect to c is a regular expression e1 such every string of e that starts with c is formed by concatenating c and a string of e1. So the language of e1 is L(e1) = { w | c.w is in L(e) }
§Example
use aws_smt_strings::regular_expressions::*;
fn sum_of_str(re: &mut ReManager, s1: &str, s2: &str) -> RegLan {
let s1 = re.str(&s1.into());
let s2 = re.str(&s2.into());
re.union(s1, s2)
}
let re = &mut ReManager::new();
let e = sum_of_str(re, "abc", "acc");
// e is 'abc + acc'
// the derivative of e w.r.t. 'a' is 'bc + cc'
let d = re.char_derivative(e, 'a' as u32);
assert_eq!(d, sum_of_str(re, "bc", "cc"));Sourcepub fn str_derivative(&mut self, e: RegLan, s: &SmtString) -> RegLan
pub fn str_derivative(&mut self, e: RegLan, s: &SmtString) -> RegLan
Derivative with respect to a string
The derivative with respect to s is defined by induction on the length of s:
- if s is empty, deriv(e, s) = e
- if s is of the form a.w, then deriv(e, s) = deriv(w, deriv(a, s))
§Example
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
let abc = re.str(&"abc".into());
let acc = re.str(&"acc".into());
let e = re.union(abc, acc);
// e is 'abc + acc'
// the derivative of e with respect to "ab" is 'c'
let d1 = re.str_derivative(e, &"ab".into());
assert_eq!(d1, re.char('c' as u32));
// the derivative of e with respect to the empty string is e
let d2 = re.str_derivative(e, &"".into());
assert_eq!(d2, e);Sourcepub fn iter_derivatives(&mut self, e: RegLan) -> DerivativeIterator<'_> ⓘ
pub fn iter_derivatives(&mut self, e: RegLan) -> DerivativeIterator<'_> ⓘ
Construct an iterator to list the derivatives of a regular expression
The iterator produces e, then the derivatives of e, then the derivatives
of these derivatives, and so forth. There are finitely many such derivatives.
The iterator produces them without duplicates.
Sourcepub fn str_in_re(&mut self, s: &SmtString, e: RegLan) -> bool
pub fn str_in_re(&mut self, s: &SmtString, e: RegLan) -> bool
Check whether a string belongs to the language defined by a regular expression
§Example
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
// Build regular expression (ac + bc)*
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);
// Check that "acacbc" is in the language
assert!(re.str_in_re(&"acacbc".into(), e))Sourcepub fn is_empty_re(&mut self, e: RegLan) -> bool
pub fn is_empty_re(&mut self, e: RegLan) -> bool
Check whether a regular expression is empty
§Example
use aws_smt_strings::regular_expressions::*;
let re = &mut ReManager::new();
let full = re.full();
let abcd = re.str(&"abcd".into());
let bc = re.str(&"bc".into());
let a = re.concat(abcd, full); // strings that start with 'abcd'
let b = re.concat_list([full, bc, full]); // strings that contain 'bc'
let test = re.diff(a, b); // strings that start with 'abcd' but don't contain 'bc'
assert!(re.is_empty_re(test));Sourcepub fn is_empty_re_bounded(
&mut self,
e: RegLan,
max_derivatives: usize,
) -> Option<bool>
pub fn is_empty_re_bounded( &mut self, e: RegLan, max_derivatives: usize, ) -> Option<bool>
Check whether a regular expression is empty, with bounded derivative exploration
This is a resource-bounded version of is_empty_re that limits the
number of derivatives computed to avoid potentially expensive operations.
§Returns
Noneif the derivative limit was reached before determining emptiness, otherwiseSome(true)if the language is emptySome(false)if the language is non-empty
§Example
use aws_smt_strings::regular_expressions::*;
let mut re = ReManager::new();
let abc = re.str(&"abc".into());
let def = re.str(&"def".into());
let intersection = re.inter(abc, def);
let small_bound_result = re.is_empty_re_bounded(intersection, 1);
assert_eq!(small_bound_result, None);
let result = re.is_empty_re_bounded(intersection, 3);
assert_eq!(result, Some(true));Sourcepub fn get_string(&mut self, e: RegLan) -> Option<SmtString>
pub fn get_string(&mut self, e: RegLan) -> Option<SmtString>
Get a string that belongs to a regular expression
Return None if the regular expression e is empty.
§Example
use aws_smt_strings::{regular_expressions::*, smt_strings::*};
let re = &mut ReManager::new();
let str1 = SmtString::from("abc");
let str2 = SmtString::from("bcd");
let abc = re.str(&str1);
let bcd = re.str(&str2);
let u = re.union(abc, bcd);
let str = re.get_string(u);
assert!(str == Some(str1) || str == Some(str2));Sourcepub fn compile(&mut self, e: RegLan) -> Automaton
pub fn compile(&mut self, e: RegLan) -> Automaton
Compile a regular expression to a deterministic finite state automaton
§Example
use aws_smt_strings::{regular_expressions::*, automata::*};
let re = &mut ReManager::new();
// (ac + bc)*
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.star(sum);
// convert e to an automaton
let auto = re.compile(e);
// string accepted by the automaton
assert!(auto.accepts(&"acbcbc".into()))Sourcepub fn try_compile(&mut self, e: RegLan, max_states: usize) -> Option<Automaton>
pub fn try_compile(&mut self, e: RegLan, max_states: usize) -> Option<Automaton>
Compile a regular expression to a DFA of bounded size
Try to compile a regular expression e to a deterministic finite-state automaton
of size no more than max_states.
- e: regular expression
- max_states: bound
Return None if the DFA has more than max_states
Return Some(a) otherwise where a is the automaton
§Example
use aws_smt_strings::{regular_expressions::*, automata::*};
let re = &mut ReManager::new();
// (ac + bc)+
let ac = re.str(&"ac".into());
let bc = re.str(&"bc".into());
let sum = re.union(ac, bc);
let e = re.plus(sum);
// the smallest DFA that recognizes e has four states
let test1 = re.try_compile(e, 3);
assert!(test1.is_none());
let test2 = re.try_compile(e, 4);
assert!(test2.is_some());