aws_smt_strings/
smt_regular_expressions.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Regular expressions as defined in the SMT-LIB QF_S Theory
6//! The alphabet is the set of u32 between 0 and 0x2FFFF.
7//!
8
9use crate::matcher::{naive_re_search, SearchResult};
10use crate::regular_expressions::*;
11use crate::smt_strings::*;
12use std::cell::RefCell;
13
14//
15// RegLan and ReManager are not thread safe (because we use Rc not Arc).
16// We keep a reference to a global ReManager as a thread-local object.
17//
18thread_local!(static MANAGER : RefCell<ReManager> = RefCell::new(ReManager::new()));
19
20///
21/// Singleton language
22///
23/// # Example
24/// ```
25/// use aws_smt_strings::{smt_strings::*, smt_regular_expressions::*};
26///
27/// let test = SmtString::from("abcde");
28/// let re = str_to_re(&test);
29/// assert!(str_in_re(&test, re));
30/// ```
31pub fn str_to_re(s: &SmtString) -> RegLan {
32    MANAGER.with(|m| m.borrow_mut().str(s))
33}
34
35///
36/// Empty language
37///
38/// # Example
39///
40/// ```
41/// use aws_smt_strings::{smt_strings::*, smt_regular_expressions::*};
42///
43/// let test = EMPTY;
44/// assert!(! str_in_re(&test, re_none()));
45/// ```
46pub fn re_none() -> RegLan {
47    MANAGER.with(|m| m.borrow().empty())
48}
49
50///
51/// Full language
52///
53/// # Example
54///
55/// ```
56/// use aws_smt_strings::{smt_strings::*, smt_regular_expressions::*};
57///
58/// assert!(str_in_re(&EMPTY, re_all()));
59/// assert!(str_in_re(&"18938".into(), re_all()));
60/// ```
61pub fn re_all() -> RegLan {
62    MANAGER.with(|m| m.borrow().full())
63}
64
65///
66/// All strings of length 1
67///
68/// # Example
69///
70/// ```
71/// use aws_smt_strings::smt_regular_expressions::*;
72///
73/// let r = re_allchar();
74/// assert!(str_in_re(&"A".into(), r));
75/// assert!(! str_in_re(&"AA".into(), r));
76/// ```
77///
78pub fn re_allchar() -> RegLan {
79    MANAGER.with(|m| m.borrow_mut().all_chars())
80}
81
82///
83/// Concatenation of two regular expressions
84///
85/// # Example
86///
87/// ```
88/// use aws_smt_strings::smt_regular_expressions::*;
89///
90/// let aaa = str_to_re(&"aaa".into());
91/// let bbb = str_to_re(&"bbb".into());
92/// assert!(str_in_re(&"aaabbb".into(), re_concat(aaa, bbb)));
93/// ```
94pub fn re_concat(r1: RegLan, r2: RegLan) -> RegLan {
95    MANAGER.with(|m| m.borrow_mut().concat(r1, r2))
96}
97
98///
99/// Concatenation of several regular expressions
100///
101/// # Example
102///
103/// ```
104/// use aws_smt_strings::smt_regular_expressions::*;
105///
106/// let r1 = str_to_re(&"abc".into());
107/// let r2 = re_allchar();
108/// let c = re_concat_list([r1, r2, r2]);
109/// assert!(str_in_re(&"abcXY".into(), c));
110/// ```
111pub fn re_concat_list(a: impl IntoIterator<Item = RegLan>) -> RegLan {
112    MANAGER.with(|m| m.borrow_mut().concat_list(a))
113}
114
115///
116/// Union of two regular expressions
117///
118/// # Example
119///
120/// ```
121/// use aws_smt_strings::smt_regular_expressions::*;
122///
123/// let r1 = re_star(str_to_re(&"a".into()));
124/// let r2 = re_star(str_to_re(&"b".into()));
125/// let u = re_union(r1, r2);
126/// assert!(str_in_re(&"aaaa".into(), u));
127/// assert!(str_in_re(&"bbb".into(), u));
128/// assert!(str_in_re(&"".into(), u));
129/// ```
130pub fn re_union(r1: RegLan, r2: RegLan) -> RegLan {
131    MANAGER.with(|m| m.borrow_mut().union(r1, r2))
132}
133
134///
135/// Union of several regular expressions
136///
137/// # Example
138///
139/// ```
140/// use aws_smt_strings::smt_regular_expressions::*;
141///
142/// let r1 = re_plus(str_to_re(&"a".into()));
143/// let r2 = re_plus(str_to_re(&"b".into()));
144/// let r3 = re_plus(str_to_re(&"c".into()));
145/// let u = re_union_list([r1, r2, r3]);
146/// assert!(str_in_re(&"cccc".into(), u));
147/// ```
148pub fn re_union_list(a: impl IntoIterator<Item = RegLan>) -> RegLan {
149    MANAGER.with(|m| m.borrow_mut().union_list(a))
150}
151
152///
153/// Intersection of two regular expressions
154///
155/// # Example
156///
157/// ```
158/// use aws_smt_strings::smt_regular_expressions::*;
159///
160/// let r1 = re_star(str_to_re(&"a".into()));
161/// let r2 = re_star(str_to_re(&"b".into()));
162/// let u = re_inter(r1, r2);
163/// assert!(str_in_re(&"".into(), u));
164/// assert!(! str_in_re(&"aaa".into(), u));
165/// ```
166pub fn re_inter(r1: RegLan, r2: RegLan) -> RegLan {
167    MANAGER.with(|m| m.borrow_mut().inter(r1, r2))
168}
169
170///
171/// Intersection of several regular expressions
172///
173/// # Example
174///
175/// ```
176/// use aws_smt_strings::smt_regular_expressions::*;
177///
178/// let r1 = re_concat(str_to_re(&"aaaa".into()), re_all());
179/// let r2 = re_concat(str_to_re(&"aaa".into()), re_all());
180/// let r3 = re_concat(str_to_re(&"aa".into()), re_all());
181/// let r = re_inter_list([r1, r2, r3]);
182/// assert!(str_in_re(&"aaaabb".into(), r));
183/// ```
184pub fn re_inter_list(a: impl IntoIterator<Item = RegLan>) -> RegLan {
185    MANAGER.with(|m| m.borrow_mut().inter_list(a))
186}
187
188///
189/// Kleene closure
190///
191/// # Example
192///
193/// ```
194/// use aws_smt_strings::smt_regular_expressions::*;
195///
196/// let r = re_star(re_union(str_to_re(&"aba".into()), str_to_re(&"cc".into())));
197/// assert!(str_in_re(&"ccccabacc".into(), r));
198/// ```
199pub fn re_star(r: RegLan) -> RegLan {
200    MANAGER.with(|m| m.borrow_mut().star(r))
201}
202
203///
204/// Complement
205///
206/// # Example
207///
208/// ```
209/// use aws_smt_strings::smt_regular_expressions::*;
210///
211/// let r = re_comp(str_to_re(&"abcd".into()));
212/// assert!(! str_in_re(&"abcd".into(), r));
213/// assert!(str_in_re(&"abcdef".into(), r));
214/// ```
215pub fn re_comp(r: RegLan) -> RegLan {
216    MANAGER.with(|m| m.borrow_mut().complement(r))
217}
218
219///
220/// Difference of two regular expressions
221///
222/// # Example
223///
224/// ```
225/// use aws_smt_strings::smt_regular_expressions::*;
226///
227/// let r1 = re_all();
228/// let r2 = re_concat(re_allchar(), re_allchar());
229/// let r = re_diff(r1, r2);
230/// assert!(str_in_re(&"X".into(), r));
231/// assert!(str_in_re(&"XYZ".into(), r));
232/// ```
233///
234pub fn re_diff(r1: RegLan, r2: RegLan) -> RegLan {
235    MANAGER.with(|m| m.borrow_mut().diff(r1, r2))
236}
237
238///
239/// Difference of several languages
240///
241/// `re_diff_list(r, a)` is the same as `re_diff(r, re_inter_list(a))`
242///
243pub fn re_diff_list(r: RegLan, a: impl IntoIterator<Item = RegLan>) -> RegLan {
244    MANAGER.with(|m| m.borrow_mut().diff_list(r, a))
245}
246
247///
248/// Kleene cross
249///
250/// # Example
251///
252/// ```
253/// use aws_smt_strings::smt_regular_expressions::*;
254///
255/// let r = re_union(str_to_re(&"ab".into()), str_to_re(&"cde".into()));
256/// assert!(str_in_re(&"abcdeab".into(), re_plus(r)));
257/// ```
258pub fn re_plus(r: RegLan) -> RegLan {
259    MANAGER.with(|m| m.borrow_mut().plus(r))
260}
261
262///
263/// Option
264///
265/// # Example
266///
267/// ```
268/// use aws_smt_strings::smt_regular_expressions::*;
269///
270/// let r = str_to_re(&"abcd".into());
271/// let opt_r = re_opt(r);
272/// assert!(str_in_re(&"".into(), opt_r));
273/// assert!(str_in_re(&"abcd".into(), opt_r));
274/// ```
275pub fn re_opt(r: RegLan) -> RegLan {
276    MANAGER.with(|m| m.borrow_mut().opt(r))
277}
278
279///
280/// Range
281///
282/// # Example
283///
284/// ```
285/// use aws_smt_strings::smt_regular_expressions::*;
286///
287/// let digit = re_range(&"0".into(), &"9".into());
288/// assert!(str_in_re(&"6".into(), digit));
289/// ```
290///
291pub fn re_range(s1: &SmtString, s2: &SmtString) -> RegLan {
292    MANAGER.with(|m| m.borrow_mut().smt_range(s1, s2))
293}
294
295///
296/// Power
297///
298/// # Example
299///
300/// ```
301/// use aws_smt_strings::smt_regular_expressions::*;
302///
303/// let digit = re_range(&"0".into(), &"9".into());
304/// let three_digits = re_power(digit, 3);
305/// assert!(str_in_re(&"124".into(), three_digits));
306/// ```
307pub fn re_power(r: RegLan, k: u32) -> RegLan {
308    MANAGER.with(|m| m.borrow_mut().exp(r, k))
309}
310
311///
312/// Loop
313///
314/// # Example
315///
316/// ```
317/// use aws_smt_strings::smt_regular_expressions::*;
318///
319/// let digit = re_range(&"0".into(), &"9".into());
320/// let two_to_five_digits = re_loop(digit, 2, 5);
321/// assert!(str_in_re(&"98".into(), two_to_five_digits));
322/// assert!(str_in_re(&"98765".into(), two_to_five_digits));
323/// ```
324///
325pub fn re_loop(r: RegLan, i: u32, j: u32) -> RegLan {
326    MANAGER.with(|m| m.borrow_mut().smt_loop(r, i, j))
327}
328
329///
330/// Check whether a string is in a regular expression
331///
332/// # Example
333///
334/// ```
335/// use aws_smt_strings::smt_regular_expressions::*;
336///
337/// let any = re_allchar();
338/// let all = re_all();
339/// let sep = str_to_re(&":".into());
340/// let aaa = str_to_re(&"aaa".into());
341/// let t = re_concat_list([any, any, sep, aaa, sep, all, sep, aaa, sep, all]);
342///
343/// assert!(str_in_re(&"ij:aaa::aaa:cdef".into(), t));
344/// ```
345///
346pub fn str_in_re(s: &SmtString, r: RegLan) -> bool {
347    MANAGER.with(|m| m.borrow_mut().str_in_re(s, r))
348}
349
350///
351/// Search for a match of r in s starting at index k
352///
353fn find_match(r: RegLan, s: &[u32], k: usize, allow_empty: bool) -> SearchResult {
354    MANAGER.with(|m| naive_re_search(&mut m.borrow_mut(), r, s, k, allow_empty))
355}
356
357///
358/// Replace the first and shortest occurrence of r by s2 in s1
359///
360/// # Example
361///
362/// ```
363/// use aws_smt_strings::smt_regular_expressions::*;
364///
365/// let a_star = re_star(str_to_re(&"a".into()));
366/// assert_eq!(str_replace_re(&"baab".into(), a_star, &"cc".into()), "ccbaab".into());
367///
368/// let a_plus = re_plus(str_to_re(&"a".into()));
369/// assert_eq!(str_replace_re(&"baab".into(), a_plus, &"cc".into()), "bccab".into());
370///
371/// assert_eq!(str_replace_re(&"nomtch".into(), a_plus, &"cc".into()), "nomtch".into());
372/// ```
373///
374pub fn str_replace_re(s1: &SmtString, r: RegLan, s2: &SmtString) -> SmtString {
375    let s1 = s1.as_ref();
376    match find_match(r, s1, 0, true) {
377        SearchResult::NotFound => s1.into(),
378        SearchResult::Found(i, j) => {
379            let mut x = Vec::new();
380            x.extend_from_slice(&s1[..i]);
381            x.extend_from_slice(s2.as_ref());
382            x.extend_from_slice(&s1[j..]);
383            x.into()
384        }
385    }
386}
387
388///
389/// Replace all non-empty matches of r by s2 in s1
390///
391/// # Example
392///
393/// ```
394/// use aws_smt_strings::smt_regular_expressions::*;
395///
396/// let a_star = re_star(str_to_re(&"a".into()));
397/// assert_eq!(str_replace_re_all(&"baab".into(), a_star, &"cd".into()), "bcdcdb".into());
398///
399/// let digits = re_plus(re_range(&"0".into(), &"9".into()));
400/// let pattern = re_concat(str_to_re(&"pre".into()), digits);
401/// assert_eq!(str_replace_re_all(&"10pre129prepre0xx".into(), pattern, &"Z".into()),
402///            "10Z29preZxx".into());
403/// ```
404pub fn str_replace_re_all(s1: &SmtString, r: RegLan, s2: &SmtString) -> SmtString {
405    let s1 = s1.as_ref();
406    let s2 = s2.as_ref();
407    let mut x = Vec::new();
408    let mut i = 0;
409    while let SearchResult::Found(j, k) = find_match(r, s1, i, false) {
410        // s[j .. k] == p
411        x.extend_from_slice(&s1[i..j]);
412        x.extend_from_slice(s2);
413        i = k;
414    }
415    x.extend_from_slice(&s1[i..]);
416    x.into()
417}