aws_smt_strings/
loop_ranges.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Loop ranges in regular expressions.
6//!
7//! SMT-LIB has a loop construct for regular expressions:
8//! Given a language L, `(loop i j L)` means repetition of L for some number
9//! between i and j. So we have:
10//!
11//!   `(loop i j L)` = union for k=i to j of L<sup>k</sup> (provided i<=j).
12//!
13//! We generalize this construct by allowing j to be +infinity. Then we
14//! use loop to reduce the number of regular expression primitives that we need:
15//! 1) L? is `(loop 0 1 L)`
16//! 2) L<sup>+</sup> is `(loop 1 +infinity L)`
17//! 3) L<sup>*</sup> is `(loop 0 +infinity L)`
18//! 4) L<sup>k</sup> is `(loop k k L)`
19//!
20//! This module implements operations on loop ranges. Each range is
21//! a pair (i, j) where 0 <= i <= j and j may be infinite.
22//!
23//! We limit i and j to u32. The code will panic in case of arithmetic overflow.
24//!
25
26use std::fmt::Display;
27
28///
29/// Loop range
30///
31/// A loop range is either a finite interval [i, j] or an infinite interval [i, ..]
32/// - A finite interval is represented as `LoopRange(i, Some(j))` where 0 <= i <= j
33/// - An infinite interval is represented as `LoopRange(i, None)`.
34///
35/// The integers i and j are stored as u32. Operations on loop ranges will panic if
36/// results cannot be stored using 32bit unsigned integers.
37///
38#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
39pub struct LoopRange(u32, Option<u32>);
40
41/// Wrapper for addition with overflow detection
42///
43/// Panics if there's an overflow.
44/// Returns x + y otherwise.
45fn add32(x: u32, y: u32) -> u32 {
46    x.checked_add(y).expect("Arithmetic overflow (add u32)")
47}
48
49/// Wrapper for multiplication with overflow detection
50///
51/// Panics is there's an overflow.
52/// Returns x * y otherwise.
53fn mul32(x: u32, y: u32) -> u32 {
54    x.checked_mul(y).expect("Arithmetic overflow (mul 32)")
55}
56
57impl Display for LoopRange {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match self {
60            LoopRange(0, Some(1)) => write!(f, "?"),
61            LoopRange(0, None) => write!(f, "*"),
62            LoopRange(1, None) => write!(f, "+"),
63            LoopRange(i, Some(j)) => {
64                if i == j {
65                    write!(f, "{i}")
66                } else {
67                    write!(f, "[{i}..{j}]")
68                }
69            }
70            LoopRange(i, None) => write!(f, "[{i}..inf)"),
71        }
72    }
73}
74
75impl LoopRange {
76    /// Construct the finite range [i, j]
77    pub fn finite(i: u32, j: u32) -> LoopRange {
78        debug_assert!(i <= j);
79        LoopRange(i, Some(j))
80    }
81
82    /// Construct the infinite range [i, +infinity]
83    pub fn infinite(i: u32) -> LoopRange {
84        LoopRange(i, None)
85    }
86
87    /// Construct the range [0, 1]
88    pub fn opt() -> LoopRange {
89        LoopRange::finite(0, 1)
90    }
91
92    /// Construct the range [0, +infinity]
93    pub fn star() -> LoopRange {
94        LoopRange::infinite(0)
95    }
96
97    /// Construct the range [1, +infinity]
98    pub fn plus() -> LoopRange {
99        LoopRange::infinite(1)
100    }
101
102    /// Construct the range [k, k]
103    pub fn point(k: u32) -> LoopRange {
104        LoopRange::finite(k, k)
105    }
106
107    /// Check whether this range is finite
108    pub fn is_finite(&self) -> bool {
109        matches!(self, LoopRange(_, Some(_)))
110    }
111
112    /// Check whether this range is infinite
113    pub fn is_infinite(&self) -> bool {
114        matches!(self, LoopRange(_, None))
115    }
116
117    /// Check whether this range is a singleton
118    pub fn is_point(&self) -> bool {
119        match self {
120            LoopRange(i, Some(j)) => *i == *j,
121            _ => false,
122        }
123    }
124
125    /// Check whether this range is the singleton 0
126    pub fn is_zero(&self) -> bool {
127        matches!(self, LoopRange(0, Some(0)))
128    }
129
130    /// Check whether this range is the singleton 1
131    pub fn is_one(&self) -> bool {
132        matches!(self, LoopRange(1, Some(1)))
133    }
134
135    /// Check whether this range is [0, +infinity]
136    pub fn is_all(&self) -> bool {
137        matches!(self, LoopRange(0, None))
138    }
139
140    /// Start of the range
141    pub fn start(&self) -> u32 {
142        self.0
143    }
144
145    /// End of the range
146    ///
147    /// # Panics
148    ///
149    /// If the range is infinite, the end of the range is not an integer.
150    /// This method will panic.
151    fn end(&self) -> u32 {
152        debug_assert!(self.is_finite());
153        self.1.unwrap()
154    }
155
156    ///
157    /// Check whether an index is in a loop range
158    ///
159    pub fn contains(&self, i: u32) -> bool {
160        match self {
161            LoopRange(j, Some(k)) => *j <= i && i <= *k,
162            LoopRange(j, None) => *j <= i,
163        }
164    }
165
166    ///
167    /// Check inclusion
168    ///
169    /// - return true if other is included in self.
170    pub fn includes(&self, other: &LoopRange) -> bool {
171        match (&self, other) {
172            (LoopRange(i1, None), LoopRange(i2, _)) => i1 <= i2,
173            (LoopRange(i1, Some(j1)), LoopRange(i2, Some(j2))) => i1 <= i2 && j2 <= j1,
174            _ => false,
175        }
176    }
177
178    ///
179    /// Add two ranges
180    ///
181    /// The result is the union of both ranges.
182    ///
183    /// This is used to rewrite `(concat (loop L a b) (loop L c d))`
184    /// to `(loop L (add [a, b], [c, d]))`.
185    ///
186    /// # Panics
187    ///
188    /// If an arithmetic overflow occurs, that is, the union of the two ranges
189    /// cannot be represented using 32bit unsigned integers, this method will panic.
190    ///
191    pub fn add(&self, other: &LoopRange) -> LoopRange {
192        let i = add32(self.start(), other.start());
193        if self.is_infinite() || other.is_infinite() {
194            LoopRange::infinite(i)
195        } else {
196            let j = add32(self.end(), other.end());
197            LoopRange::finite(i, j)
198        }
199    }
200
201    ///
202    /// Add a point interval
203    ///
204    /// Add the point interval [x, x] to self.
205    ///
206    /// # Panics
207    ///
208    /// If there's an arithmetic overflow. See [add][Self::add].
209    pub fn add_point(&self, x: u32) -> LoopRange {
210        self.add(&Self::point(x))
211    }
212
213    ///
214    /// Multiply by a constant k
215    ///
216    /// - If the current range is a finite interval [i, j], return [k * i, k * j]
217    /// - If the current range is an infinite interval [i, +infinity] and
218    ///   k is not zero, return [k * i, +infinity]
219    /// - If k is zero, return [0, 0] even if the current range is infinite
220    ///
221    /// This corresponds to adding [i, j] to itself k times.
222    ///
223    /// This can be used to rewrite
224    ///
225    ///   `(loop L i j)^k` = `(loop L (k * i) (k * j))`
226    ///
227    /// # Panics
228    ///
229    /// If there's an arithmetic overflow in k * i or k * j.
230    ///
231    pub fn scale(&self, k: u32) -> LoopRange {
232        if k == 0 {
233            LoopRange::point(0)
234        } else if self.is_infinite() {
235            let i = mul32(self.start(), k);
236            LoopRange::infinite(i)
237        } else {
238            let i = mul32(self.start(), k);
239            let j = mul32(self.end(), k);
240            LoopRange::finite(i, j)
241        }
242    }
243
244    ///
245    /// Multiply two ranges
246    ///
247    /// The product of the ranges [a, b] and [c, d] (where b or d may be +infinity)
248    /// is the range [a * c, b * d]. The following rules handle multiplication with
249    /// infinity:
250    /// - 0 * infinity = infinity * 0 = 0
251    /// - k * infinity = infinity * k = infinity if k is finite and non-zero
252    /// - infinity * infinity = infinity
253    ///
254    /// **Note:**
255    ///
256    /// The actual product of [a, b] and [c, d] is the set of integers
257    /// P = { x * y | a <= x <= b and c <= y <= d }.
258    ///
259    /// The interval [a * c, b * d] is an over approximation of P,
260    /// since P may not be an interval.
261    ///
262    /// For example: if [a, b] = [0, 1] and [c, d] = [3, 4] then
263    /// P is { 0, 3, 4 } but [0, 1] * [3, 4] is [0, 4].
264    ///
265    /// Method [right_mul_is_exact][Self::right_mul_is_exact] can be used to check
266    /// whether the product is exact.
267    ///
268    /// # Panics
269    ///
270    /// If there's an arithmetic overflow in the product computation,
271    /// that is, the result cannot be stored using u32 integers, this
272    /// method will panic.
273    ///
274    pub fn mul(&self, other: &LoopRange) -> LoopRange {
275        if self.is_zero() || other.is_zero() {
276            LoopRange::point(0)
277        } else if self.is_infinite() || other.is_infinite() {
278            let i = mul32(self.start(), other.start());
279            LoopRange::infinite(i)
280        } else {
281            let i = mul32(self.start(), other.start());
282            let j = mul32(self.end(), other.end());
283            LoopRange::finite(i, j)
284        }
285    }
286
287    ///
288    /// Check whether the product of two ranges is an interval.
289    ///
290    /// Given a range r = [a, b] and s = [c, d], then
291    ///
292    ///    `r.right_mul_is_exact(s)`
293    ///
294    /// returns true if the Union(k * [a, b]) for k in s is equal to [a, b] * [c, d].
295    ///
296    /// If it is, we can rewrite `(loop (loop L a b) c d)` to `(loop L a * c b * d)`.
297    ///
298    /// **Note**:
299    ///
300    /// The product of two ranges is commutative but this method is not.
301    /// For example:
302    ///
303    /// 1) Union(k * [0, +infinity], k in [2, 2]) = 2 * [0, +infinity] = [0, +infinity]
304    ///    so
305    ///
306    ///      `(loop (loop L 0 +infinity) 2 2)` = (L*)^2 = L*
307    ///
308    /// 2) Union(k * [2, 2], k in [0, +infinity]) = { 2 * k | k in [0, +infinity] }
309    ///    so
310    ///
311    ///      `(loop (loop L 2 2) 0 +infinity)` = (L^2)*  (not the same as L*)
312    ///
313    /// # Example
314    ///
315    /// ```
316    /// use aws_smt_strings::loop_ranges::*;
317    ///
318    /// let r = LoopRange::point(2); // range [2, 2]
319    /// let s = LoopRange::star();   // range [0, +infinity]
320    ///
321    /// assert!(s.right_mul_is_exact(&r));  // [0, +infinity] * [2, 2] is [0, +infinity] (exact)
322    /// assert!(!r.right_mul_is_exact(&s)); // [2, 2] * [0, +infinity] is not an interval
323    /// ```
324    pub fn right_mul_is_exact(&self, other: &LoopRange) -> bool {
325        //
326        // Explanation:
327        // let self = [a, b] and other = [c, d].
328        // let K = Union( y * [a, b] for y = c to d ).
329        //
330        // In this, y * [a, b] is the sum of y integers, all taken in the
331        // interval [a, b]. So y * [a, b] is equal to [y * a, y * b].
332        //
333        // So K is the union of the intervals [y * a, y * b] for y = c to d.
334        //
335        // If c == d then K is an interval since then K=[c * a, c * b].
336        //
337        // If b == +infinity then there are two cases:
338        // 1) c == 0, then K = is [0, 0] \union [a, +infinity]
339        //    K is an interval if a <= 1
340        // 2) c > 0 then K is the interval [c * a, +infinity]
341        //
342        // Otherwise, K is an interval if there's no gap between
343        // subsequent intervals [y*a, y*b] and [(y+1)*a, (y+1)*b].
344        // There's a gap iff (y+1)*a - y*b > 1, which is equivalent
345        //   to y * (b - a) + 1 < a.
346        // So no gap for y is y * (b - a) >= a - 1.
347        //
348        // This condition holds for all y in [c, d] iff it holds when y = c.
349        //
350        other.is_point()
351            || if self.is_infinite() {
352                other.start() > 0 || self.start() <= 1
353            } else {
354                // c = other.start()
355                // b = self.end()
356                // a = self.start()
357                mul32(other.start(), self.end() - self.start()) >= self.start().saturating_sub(1)
358            }
359    }
360
361    ///
362    /// Shift by one.
363    ///
364    /// If self = [a, b], return [dec(a), dec(b)] where dec(x) *decrements* x by 1:
365    /// - dec(0) = 0
366    /// - dec(+infinity) = +infinity
367    /// - dec(x) = x-1 otherwise
368    ///
369    pub fn shift(&self) -> LoopRange {
370        match self {
371            LoopRange(0, None) => LoopRange::infinite(0),
372            LoopRange(0, Some(0)) => LoopRange::point(0),
373            LoopRange(0, Some(j)) => LoopRange::finite(0, *j - 1),
374            LoopRange(i, None) => LoopRange::infinite(*i - 1),
375            LoopRange(i, Some(j)) => LoopRange::finite(*i - 1, *j - 1),
376        }
377    }
378}
379
380#[allow(clippy::uninlined_format_args)]
381#[cfg(test)]
382mod test {
383    use super::LoopRange;
384
385    fn make_examples() -> Vec<LoopRange> {
386        vec![
387            LoopRange::finite(2, 3),
388            LoopRange::finite(4, 9),
389            LoopRange::finite(1, 3),
390            LoopRange::infinite(3),
391            LoopRange::infinite(20),
392            LoopRange::point(0),
393            LoopRange::point(1),
394            LoopRange::point(2),
395            LoopRange::point(5),
396            LoopRange::opt(),
397            LoopRange::star(),
398            LoopRange::plus(),
399        ]
400    }
401
402    #[test]
403    fn basic() {
404        println!("Test examples");
405        for rng in make_examples() {
406            println!(
407                "range {}: finite: {}, infinite: {}, point: {}",
408                rng,
409                rng.is_finite(),
410                rng.is_infinite(),
411                rng.is_point()
412            );
413        }
414        println!()
415    }
416
417    #[test]
418    fn test_shift() {
419        println!("Shift tests");
420        for rng in make_examples() {
421            println!("shift({}) = {}", rng, rng.shift());
422        }
423        println!()
424    }
425
426    #[test]
427    fn test_add() {
428        println!("Add tests");
429        let v = make_examples();
430        for r in &v {
431            for s in &v {
432                println!("add({}, {}) = {}", r, s, r.add(s))
433            }
434        }
435        println!()
436    }
437
438    #[test]
439    fn test_scale() {
440        println!("Scaling tests");
441        for r in make_examples() {
442            for k in 0..5 {
443                let s = r.scale(k);
444                println!("scale({}, {}) = {}", r, k, &s);
445                match k {
446                    0 => assert!(s.is_zero()),
447                    1 => assert_eq!(s, r),
448                    _ => assert_eq!(s.is_finite(), r.is_finite()),
449                }
450            }
451        }
452        println!();
453    }
454
455    #[test]
456    fn test_mul() {
457        println!("Product tests");
458        let v = make_examples();
459        for r in &v {
460            for s in &v {
461                let product = r.mul(s);
462                let exact = r.right_mul_is_exact(s);
463                println!("mul({}, {}) = {} (exact: {})", r, s, product, exact);
464
465                if r.is_zero() || s.is_zero() {
466                    assert!(product.is_zero());
467                }
468                if product.is_point() {
469                    assert!(exact);
470                }
471            }
472        }
473        println!()
474    }
475
476    #[test]
477    fn test_mul_exact() {
478        let v = [
479            LoopRange::point(2),
480            LoopRange::finite(2, 3),
481            LoopRange::infinite(2),
482        ];
483
484        // r^3 is an interval for any r
485        for r in &v {
486            assert!(r.right_mul_is_exact(&LoopRange::point(3)));
487        }
488
489        let opt = LoopRange::opt();
490        let star = LoopRange::star();
491        let plus = LoopRange::plus();
492
493        // all example r contain integers >= 2
494        // r^opt and r^* contain 0 and integers >= 2
495        // so r^opt and r^* are not intervals
496        for r in &v {
497            assert!(!r.right_mul_is_exact(&opt));
498            assert!(!r.right_mul_is_exact(&star));
499        }
500
501        // 2^+ = { 2, 4, ...} : not an interval
502        assert!(!v[0].right_mul_is_exact(&plus));
503        // [2,3]^+ = [2, inf): interval
504        assert!(v[1].right_mul_is_exact(&plus));
505        // [2, inf^)+ = [2, inf): interval
506        assert!(v[2].right_mul_is_exact(&plus));
507
508        // plus^opt is star
509        assert!(plus.right_mul_is_exact(&opt));
510        assert_eq!(plus.mul(&opt), star);
511        // star^opt is star
512        assert!(star.right_mul_is_exact(&opt));
513        assert_eq!(star.mul(&opt), star);
514        // opt^opt is opt
515        assert!(opt.right_mul_is_exact(&opt));
516        assert_eq!(opt.mul(&opt), opt);
517    }
518}