smt_str/re/
deriv.rs

1//! Brzozowski derivatives for regular expression.
2use std::collections::HashMap;
3
4use smallvec::{smallvec, SmallVec};
5
6use crate::{SmtChar, SmtString};
7
8use super::{union_of_chars, ReBuilder, ReOp, Regex};
9
10/// A builder for calculating derivatives of regular expressions.
11/// The builder caches the derivatives of regular expressions w.r.t. to symbols.
12/// This can be useful when calculating the derivative of a regular expression w.r.t. to the same symbol multiple times.
13#[derive(Debug, Clone, Default)]
14pub struct DerivativeBuilder {
15    cache: HashMap<(Regex, SmtChar), Regex>,
16}
17
18impl DerivativeBuilder {
19    pub fn deriv(&mut self, r: &Regex, c: SmtChar, builder: &mut ReBuilder) -> Regex {
20        if let Some(derived) = self.cache.get(&(r.clone(), c)) {
21            derived.clone()
22        } else if let Some(chrs) = union_of_chars(r) {
23            if chrs.iter().any(|r| r.contains(c)) {
24                return builder.epsilon();
25            } else {
26                return builder.none();
27            }
28        } else {
29            let deriv = match r.op() {
30                ReOp::Literal(w) => {
31                    if w.first() == Some(c) {
32                        builder.to_re(w.drop(1))
33                    } else {
34                        builder.none()
35                    }
36                }
37                ReOp::None => builder.none(),
38                ReOp::All => builder.all(),
39                ReOp::Any => builder.epsilon(),
40                ReOp::Union(rs) => {
41                    let ch: SmallVec<[Regex; 2]> =
42                        rs.iter().map(|r| self.deriv(r, c, builder)).collect();
43                    builder.union(ch)
44                }
45                ReOp::Concat(rs) => {
46                    let mut union = smallvec![];
47                    for (n, r) in rs.iter().enumerate() {
48                        let current = self.deriv(r, c, builder);
49                        let remaining = rs.iter().skip(n + 1);
50                        // Concatenate the current deriv with the remaining children
51                        let mut v = SmallVec::with_capacity(remaining.len() + 1);
52                        v.push(current);
53                        v.extend(remaining.cloned());
54                        let deriv = builder.concat(v);
55                        union.push(deriv);
56                        // Continue deriving the next child if the current child is nullable
57                        if !r.nullable() {
58                            break;
59                        }
60                    }
61
62                    if union.len() == 1 {
63                        union.pop().unwrap()
64                    } else if !union.is_empty() {
65                        builder.union(union)
66                    } else {
67                        // That only happens if the concatenation is empty, at which point the derivative is none
68                        builder.none()
69                    }
70                }
71                ReOp::Inter(rs) => {
72                    let ch = rs.iter().map(|r| self.deriv(r, c, builder)).collect();
73                    builder.inter(ch)
74                }
75                ReOp::Star(r) | ReOp::Plus(r) => {
76                    let rderiv = self.deriv(r, c, builder);
77                    let star = builder.star(r.clone());
78                    builder.concat(smallvec![rderiv, star])
79                }
80                ReOp::Opt(r) => self.deriv(r, c, builder),
81                ReOp::Range(r) => {
82                    let l = r.start();
83                    let u = r.end();
84                    if l <= c && c <= u {
85                        builder.epsilon()
86                    } else {
87                        builder.none()
88                    }
89                }
90                ReOp::Comp(r) => {
91                    let r = self.deriv(r, c, builder);
92                    builder.comp(r)
93                }
94                ReOp::Diff(d1, d2) => {
95                    // rewrite as intersection with complement
96                    let d2c = builder.comp(d2.clone());
97                    let inter = builder.inter(smallvec![d1.clone(), d2c.clone()]);
98                    self.deriv(&inter, c, builder)
99                }
100                ReOp::Pow(r, e) => {
101                    if *e == 0 {
102                        builder.none()
103                    } else {
104                        let derivr = self.deriv(r, c, builder);
105                        let pow = builder.pow(r.clone(), e - 1);
106                        builder.concat(smallvec![derivr, pow])
107                    }
108                }
109                ReOp::Loop(r, l, u) => {
110                    if l > u || *u == 0 {
111                        // is either none or epsilon, in both cases the derivative is none
112                        return builder.none();
113                    } else {
114                        let rderiv = self.deriv(r, c, builder);
115                        let loop_ =
116                            builder.loop_(r.clone(), l.saturating_sub(1), u.saturating_sub(1));
117                        builder.concat(smallvec![rderiv, loop_])
118                    }
119                }
120            };
121            self.cache.insert((r.clone(), c), deriv.clone());
122            deriv
123        }
124    }
125}
126
127/// Calculates the derivative of a regex w.r.t. to a symbol.
128/// The derivative of a regex `r` w.r.t. to a symbol `c` is a regex that matches the suffix of all words that `r` matches when `c` is removed from the beginning.
129/// New expressions are built using the given [RegexBuilder].
130/// It is a precondition that the given regex is managed by the same [RegexBuilder] as the one passed to this function.
131pub fn deriv(r: &Regex, c: impl Into<SmtChar>, builder: &mut ReBuilder) -> Regex {
132    let mut deriver = DerivativeBuilder::default();
133    deriver.deriv(r, c.into(), builder)
134}
135
136/// Calculates the derivative of a regex w.r.t. to a word by repeatedly calculating the derivative of the regex w.r.t. to each symbol in the word.
137/// All derivatives are built using the given [RegexBuilder].
138/// It is a precondition that the given regex is managed by the same [RegexBuilder] as the one passed to this function.
139pub fn deriv_word(r: &Regex, w: impl Into<SmtString>, builder: &mut ReBuilder) -> Regex {
140    let mut result = r.clone();
141    let w = w.into();
142    for c in w.iter() {
143        result = deriv(&result, *c, builder);
144    }
145    result
146}
147
148#[cfg(test)]
149mod test {
150
151    use std::rc::Rc;
152
153    use quickcheck_macros::quickcheck;
154
155    use super::*;
156    #[test]
157    fn deriv_const() {
158        let mut builder = ReBuilder::non_optimizing();
159        let r = builder.to_re("foo".into());
160        let expected = builder.to_re("oo".into());
161        let derived = deriv(&r, 'f', &mut builder);
162        assert_eq!(derived, expected);
163    }
164
165    #[test]
166    fn deriv_const_builder() {
167        let mut builder = ReBuilder::non_optimizing();
168        let r = builder.to_re("foo".into());
169        let expected = builder.to_re("oo".into());
170        let derived = deriv(&r, 'f', &mut builder);
171        assert!(Rc::ptr_eq(&derived, &expected));
172    }
173
174    #[quickcheck]
175    fn deriv_none(c: SmtChar) {
176        let mut builder = ReBuilder::non_optimizing();
177        let r = builder.none();
178        let expected = builder.none();
179        assert_eq!(deriv(&r, c, &mut builder), expected);
180    }
181
182    #[quickcheck]
183    fn deriv_all(c: SmtChar) {
184        let mut builder = ReBuilder::non_optimizing();
185        let r = builder.all();
186        let expected = builder.all();
187        assert_eq!(deriv(&r, c, &mut builder), expected);
188    }
189
190    #[test]
191    fn deriv_all_char_builder() {
192        let mut builder = ReBuilder::non_optimizing();
193        let r = builder.allchar();
194        let expected = builder.epsilon();
195        assert_eq!(deriv(&r, 'f', &mut builder), expected);
196    }
197
198    #[test]
199    fn deriv_range() {
200        let mut builder = ReBuilder::non_optimizing();
201        let r = builder.range_from_to('a', 'z');
202        assert_eq!(deriv(&r, 'f', &mut builder), builder.epsilon());
203        assert_eq!(deriv(&r, '1', &mut builder), builder.none());
204    }
205
206    #[test]
207    fn deriv_concat() {
208        let mut builder = ReBuilder::non_optimizing();
209
210        let chs = smallvec![builder.to_re("foo".into()), builder.to_re("bar".into())];
211        let r = builder.concat(chs);
212
213        let chs = smallvec![builder.to_re("oo".into()), builder.to_re("bar".into())];
214        let expected = builder.concat(chs);
215
216        assert_eq!(deriv(&r, 'f', &mut builder), expected);
217        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
218        assert!(deriv(&r, 'o', &mut builder).none().unwrap());
219    }
220
221    #[test]
222    fn deriv_concat_nullable() {
223        let mut builder = ReBuilder::default();
224        let foo = builder.to_re("foo".into());
225
226        // foo?
227        let opt = builder.opt(foo.clone());
228
229        let r = smallvec![opt.clone(), builder.to_re("far".into())];
230        // (foo)?far
231        let r = builder.concat(r);
232
233        // oofar|ar
234        let expected = smallvec![builder.to_re("oo".into()), builder.to_re("far".into())];
235        let concat = builder.concat(expected);
236        let expected = smallvec![concat, builder.to_re("ar".into())];
237        let expected = builder.union(expected);
238
239        let derived = deriv(&r, 'f', &mut builder);
240        assert_eq!(
241            expected.op(),
242            derived.op(),
243            "Expected: {} but was {}",
244            expected,
245            derived
246        );
247        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
248        assert!(deriv(&r, 'o', &mut builder).none().unwrap());
249    }
250
251    #[test]
252    fn deriv_union() {
253        let mut builder = ReBuilder::non_optimizing();
254        let foo = builder.to_re("foo".into());
255        let bar = builder.to_re("bar".into());
256        let baz = builder.to_re("baz".into());
257        let r = builder.union(smallvec![foo, bar, baz]);
258
259        let ar = builder.to_re("ar".into());
260        let az = builder.to_re("az".into());
261        let none = builder.none();
262        let expected = builder.union(smallvec![none.clone(), ar, az]);
263        assert_eq!(deriv(&r, 'b', &mut builder), expected);
264
265        let oo = builder.to_re("oo".into());
266        let expected = builder.union(smallvec![oo, none.clone(), none.clone()]);
267        assert_eq!(deriv(&r, 'f', &mut builder), expected);
268
269        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
270    }
271
272    #[test]
273    fn deriv_star() {
274        let mut builder = ReBuilder::non_optimizing();
275        let foo = builder.to_re("foo".into());
276        let r = builder.star(foo.clone());
277
278        let oo = builder.to_re("oo".into());
279        let expected = builder.concat(smallvec![oo, r.clone()]);
280        assert_eq!(deriv(&r, 'f', &mut builder), expected);
281        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
282    }
283
284    #[test]
285    fn deriv_plus() {
286        let mut builder = ReBuilder::non_optimizing();
287        let foo = builder.to_re("foo".into());
288        let r = builder.plus(foo.clone());
289
290        let expected = smallvec![builder.to_re("oo".into()), builder.star(foo.clone())];
291        let expected = builder.concat(expected);
292        assert_eq!(deriv(&r, 'f', &mut builder), expected);
293        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
294    }
295
296    #[test]
297    fn deriv_opt() {
298        let mut builder = ReBuilder::non_optimizing();
299        let foo = builder.to_re("foo".into());
300        let r = builder.opt(foo.clone());
301
302        let expected = builder.to_re("oo".into());
303        assert_eq!(deriv(&r, 'f', &mut builder), expected);
304        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
305    }
306
307    #[test]
308    fn deriv_pow() {
309        let mut builder = ReBuilder::non_optimizing();
310        let foo = builder.to_re("foo".into());
311        let r = builder.pow(foo.clone(), 3);
312
313        let oo = builder.to_re("oo".into());
314        let pow2 = builder.pow(foo.clone(), 2);
315        let expected = builder.concat(smallvec![oo, pow2]);
316        assert_eq!(deriv(&r, 'f', &mut builder), expected);
317        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
318    }
319
320    #[test]
321    fn deriv_inter() {
322        let mut builder = ReBuilder::non_optimizing();
323        let foo = builder.to_re("foo".into());
324        let far = builder.to_re("far".into());
325        let r = builder.inter(smallvec![foo.clone(), far.clone()]);
326        let oo = builder.to_re("oo".into());
327        let ar = builder.to_re("ar".into());
328        let expected = builder.inter(smallvec![oo, ar]);
329        assert_eq!(deriv(&r, 'f', &mut builder), expected);
330        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
331    }
332
333    #[test]
334    fn deriv_diff() {
335        let mut builder = ReBuilder::non_optimizing();
336        let foo = builder.to_re("foo".into());
337        let far = builder.to_re("far".into());
338        let r = builder.diff(foo.clone(), far.clone());
339        let oo = builder.to_re("oo".into());
340        let ar = builder.to_re("ar".into());
341        let ar_comp = builder.comp(ar.clone());
342        let expected = builder.inter(smallvec![oo, ar_comp]);
343        assert_eq!(deriv(&r, 'f', &mut builder), expected);
344        assert!(deriv(&r, 'g', &mut builder).none().unwrap());
345    }
346
347    #[test]
348    fn deriv_pow_0() {
349        let mut builder = ReBuilder::non_optimizing();
350        let foo = builder.to_re("foo".into());
351        let r = builder.pow(foo.clone(), 0);
352        assert_eq!(deriv(&r, 'f', &mut builder), builder.none());
353    }
354
355    #[test]
356    fn deriv_loop() {
357        let mut builder = ReBuilder::non_optimizing();
358        let foo = builder.to_re("foo".into());
359        let r = builder.loop_(foo.clone(), 1, 2);
360
361        let loop2 = builder.loop_(foo.clone(), 0, 1);
362        let oo = builder.to_re("oo".into());
363        let expected = builder.concat(smallvec![oo, loop2]);
364        assert_eq!(deriv(&r, 'f', &mut builder), expected);
365        assert!(deriv(&r, 'b', &mut builder).none().unwrap());
366    }
367
368    #[test]
369    fn deriv_loop_empty() {
370        let mut builder = ReBuilder::non_optimizing();
371        let foo = builder.to_re("foo".into());
372        let r = builder.loop_(foo.clone(), 0, 0);
373        assert_eq!(deriv(&r, 'f', &mut builder), builder.none());
374    }
375
376    #[test]
377    fn deriv_loop_empty2() {
378        let mut builder = ReBuilder::non_optimizing();
379        let foo = builder.to_re("foo".into());
380        let r = builder.loop_(foo.clone(), 1, 0);
381        assert_eq!(deriv(&r, 'f', &mut builder), builder.none());
382    }
383
384    #[quickcheck]
385    fn deriv_loops(l: u32, u: u32) {
386        let l = l % 100;
387        let u = u % 100;
388        let mut builder = ReBuilder::non_optimizing();
389        let foo = builder.to_re("foo".into());
390        let r = builder.loop_(foo.clone(), l, u);
391
392        if l > u || u == 0 {
393            assert_eq!(deriv(&r, 'f', &mut builder), builder.none());
394        } else {
395            let loop_ = builder.loop_(foo.clone(), l.saturating_sub(1), u.saturating_sub(1));
396            let oo = builder.to_re("oo".into());
397            let expected = builder.concat(smallvec![oo, loop_]);
398            assert_eq!(deriv(&r, 'f', &mut builder), expected);
399        }
400    }
401}