aws_smt_strings/loop_ranges.rs
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Loop ranges in regular expressions.
6//!
7//! SMT-LIB has a loop construct for regular expressions:
8//! Given a language L, `(loop i j L)` means repetition of L for some number
9//! between i and j. So we have:
10//!
11//! `(loop i j L)` = union for k=i to j of L<sup>k</sup> (provided i<=j).
12//!
13//! We generalize this construct by allowing j to be +infinity. Then we
14//! use loop to reduce the number of regular expression primitives that we need:
15//! 1) L? is `(loop 0 1 L)`
16//! 2) L<sup>+</sup> is `(loop 1 +infinity L)`
17//! 3) L<sup>*</sup> is `(loop 0 +infinity L)`
18//! 4) L<sup>k</sup> is `(loop k k L)`
19//!
20//! This module implements operations on loop ranges. Each range is
21//! a pair (i, j) where 0 <= i <= j and j may be infinite.
22//!
23//! We limit i and j to u32. The code will panic in case of arithmetic overflow.
24//!
25
26use std::fmt::Display;
27
28///
29/// Loop range
30///
31/// A loop range is either a finite interval [i, j] or an infinite interval [i, ..]
32/// - A finite interval is represented as `LoopRange(i, Some(j))` where 0 <= i <= j
33/// - An infinite interval is represented as `LoopRange(i, None)`.
34///
35/// The integers i and j are stored as u32. Operations on loop ranges will panic if
36/// results cannot be stored using 32bit unsigned integers.
37///
38#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
39pub struct LoopRange(u32, Option<u32>);
40
41/// Wrapper for addition with overflow detection
42///
43/// Panics if there's an overflow.
44/// Returns x + y otherwise.
45fn add32(x: u32, y: u32) -> u32 {
46 x.checked_add(y).expect("Arithmetic overflow (add u32)")
47}
48
49/// Wrapper for multiplication with overflow detection
50///
51/// Panics is there's an overflow.
52/// Returns x * y otherwise.
53fn mul32(x: u32, y: u32) -> u32 {
54 x.checked_mul(y).expect("Arithmetic overflow (mul 32)")
55}
56
57impl Display for LoopRange {
58 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59 match self {
60 LoopRange(0, Some(1)) => write!(f, "?"),
61 LoopRange(0, None) => write!(f, "*"),
62 LoopRange(1, None) => write!(f, "+"),
63 LoopRange(i, Some(j)) => {
64 if i == j {
65 write!(f, "{i}")
66 } else {
67 write!(f, "[{i}..{j}]")
68 }
69 }
70 LoopRange(i, None) => write!(f, "[{i}..inf)"),
71 }
72 }
73}
74
75impl LoopRange {
76 /// Construct the finite range [i, j]
77 pub fn finite(i: u32, j: u32) -> LoopRange {
78 debug_assert!(i <= j);
79 LoopRange(i, Some(j))
80 }
81
82 /// Construct the infinite range [i, +infinity]
83 pub fn infinite(i: u32) -> LoopRange {
84 LoopRange(i, None)
85 }
86
87 /// Construct the range [0, 1]
88 pub fn opt() -> LoopRange {
89 LoopRange::finite(0, 1)
90 }
91
92 /// Construct the range [0, +infinity]
93 pub fn star() -> LoopRange {
94 LoopRange::infinite(0)
95 }
96
97 /// Construct the range [1, +infinity]
98 pub fn plus() -> LoopRange {
99 LoopRange::infinite(1)
100 }
101
102 /// Construct the range [k, k]
103 pub fn point(k: u32) -> LoopRange {
104 LoopRange::finite(k, k)
105 }
106
107 /// Check whether this range is finite
108 pub fn is_finite(&self) -> bool {
109 matches!(self, LoopRange(_, Some(_)))
110 }
111
112 /// Check whether this range is infinite
113 pub fn is_infinite(&self) -> bool {
114 matches!(self, LoopRange(_, None))
115 }
116
117 /// Check whether this range is a singleton
118 pub fn is_point(&self) -> bool {
119 match self {
120 LoopRange(i, Some(j)) => *i == *j,
121 _ => false,
122 }
123 }
124
125 /// Check whether this range is the singleton 0
126 pub fn is_zero(&self) -> bool {
127 matches!(self, LoopRange(0, Some(0)))
128 }
129
130 /// Check whether this range is the singleton 1
131 pub fn is_one(&self) -> bool {
132 matches!(self, LoopRange(1, Some(1)))
133 }
134
135 /// Check whether this range is [0, +infinity]
136 pub fn is_all(&self) -> bool {
137 matches!(self, LoopRange(0, None))
138 }
139
140 /// Start of the range
141 pub fn start(&self) -> u32 {
142 self.0
143 }
144
145 /// End of the range
146 ///
147 /// # Panics
148 ///
149 /// If the range is infinite, the end of the range is not an integer.
150 /// This method will panic.
151 fn end(&self) -> u32 {
152 debug_assert!(self.is_finite());
153 self.1.unwrap()
154 }
155
156 ///
157 /// Check whether an index is in a loop range
158 ///
159 pub fn contains(&self, i: u32) -> bool {
160 match self {
161 LoopRange(j, Some(k)) => *j <= i && i <= *k,
162 LoopRange(j, None) => *j <= i,
163 }
164 }
165
166 ///
167 /// Check inclusion
168 ///
169 /// - return true if other is included in self.
170 pub fn includes(&self, other: &LoopRange) -> bool {
171 match (&self, other) {
172 (LoopRange(i1, None), LoopRange(i2, _)) => i1 <= i2,
173 (LoopRange(i1, Some(j1)), LoopRange(i2, Some(j2))) => i1 <= i2 && j2 <= j1,
174 _ => false,
175 }
176 }
177
178 ///
179 /// Add two ranges
180 ///
181 /// The result is the union of both ranges.
182 ///
183 /// This is used to rewrite `(concat (loop L a b) (loop L c d))`
184 /// to `(loop L (add [a, b], [c, d]))`.
185 ///
186 /// # Panics
187 ///
188 /// If an arithmetic overflow occurs, that is, the union of the two ranges
189 /// cannot be represented using 32bit unsigned integers, this method will panic.
190 ///
191 pub fn add(&self, other: &LoopRange) -> LoopRange {
192 let i = add32(self.start(), other.start());
193 if self.is_infinite() || other.is_infinite() {
194 LoopRange::infinite(i)
195 } else {
196 let j = add32(self.end(), other.end());
197 LoopRange::finite(i, j)
198 }
199 }
200
201 ///
202 /// Add a point interval
203 ///
204 /// Add the point interval [x, x] to self.
205 ///
206 /// # Panics
207 ///
208 /// If there's an arithmetic overflow. See [add][Self::add].
209 pub fn add_point(&self, x: u32) -> LoopRange {
210 self.add(&Self::point(x))
211 }
212
213 ///
214 /// Multiply by a constant k
215 ///
216 /// - If the current range is a finite interval [i, j], return [k * i, k * j]
217 /// - If the current range is an infinite interval [i, +infinity] and
218 /// k is not zero, return [k * i, +infinity]
219 /// - If k is zero, return [0, 0] even if the current range is infinite
220 ///
221 /// This corresponds to adding [i, j] to itself k times.
222 ///
223 /// This can be used to rewrite
224 ///
225 /// `(loop L i j)^k` = `(loop L (k * i) (k * j))`
226 ///
227 /// # Panics
228 ///
229 /// If there's an arithmetic overflow in k * i or k * j.
230 ///
231 pub fn scale(&self, k: u32) -> LoopRange {
232 if k == 0 {
233 LoopRange::point(0)
234 } else if self.is_infinite() {
235 let i = mul32(self.start(), k);
236 LoopRange::infinite(i)
237 } else {
238 let i = mul32(self.start(), k);
239 let j = mul32(self.end(), k);
240 LoopRange::finite(i, j)
241 }
242 }
243
244 ///
245 /// Multiply two ranges
246 ///
247 /// The product of the ranges [a, b] and [c, d] (where b or d may be +infinity)
248 /// is the range [a * c, b * d]. The following rules handle multiplication with
249 /// infinity:
250 /// - 0 * infinity = infinity * 0 = 0
251 /// - k * infinity = infinity * k = infinity if k is finite and non-zero
252 /// - infinity * infinity = infinity
253 ///
254 /// **Note:**
255 ///
256 /// The actual product of [a, b] and [c, d] is the set of integers
257 /// P = { x * y | a <= x <= b and c <= y <= d }.
258 ///
259 /// The interval [a * c, b * d] is an over approximation of P,
260 /// since P may not be an interval.
261 ///
262 /// For example: if [a, b] = [0, 1] and [c, d] = [3, 4] then
263 /// P is { 0, 3, 4 } but [0, 1] * [3, 4] is [0, 4].
264 ///
265 /// Method [right_mul_is_exact][Self::right_mul_is_exact] can be used to check
266 /// whether the product is exact.
267 ///
268 /// # Panics
269 ///
270 /// If there's an arithmetic overflow in the product computation,
271 /// that is, the result cannot be stored using u32 integers, this
272 /// method will panic.
273 ///
274 pub fn mul(&self, other: &LoopRange) -> LoopRange {
275 if self.is_zero() || other.is_zero() {
276 LoopRange::point(0)
277 } else if self.is_infinite() || other.is_infinite() {
278 let i = mul32(self.start(), other.start());
279 LoopRange::infinite(i)
280 } else {
281 let i = mul32(self.start(), other.start());
282 let j = mul32(self.end(), other.end());
283 LoopRange::finite(i, j)
284 }
285 }
286
287 ///
288 /// Check whether the product of two ranges is an interval.
289 ///
290 /// Given a range r = [a, b] and s = [c, d], then
291 ///
292 /// `r.right_mul_is_exact(s)`
293 ///
294 /// returns true if the Union(k * [a, b]) for k in s is equal to [a, b] * [c, d].
295 ///
296 /// If it is, we can rewrite `(loop (loop L a b) c d)` to `(loop L a * c b * d)`.
297 ///
298 /// **Note**:
299 ///
300 /// The product of two ranges is commutative but this method is not.
301 /// For example:
302 ///
303 /// 1) Union(k * [0, +infinity], k in [2, 2]) = 2 * [0, +infinity] = [0, +infinity]
304 /// so
305 ///
306 /// `(loop (loop L 0 +infinity) 2 2)` = (L*)^2 = L*
307 ///
308 /// 2) Union(k * [2, 2], k in [0, +infinity]) = { 2 * k | k in [0, +infinity] }
309 /// so
310 ///
311 /// `(loop (loop L 2 2) 0 +infinity)` = (L^2)* (not the same as L*)
312 ///
313 /// # Example
314 ///
315 /// ```
316 /// use aws_smt_strings::loop_ranges::*;
317 ///
318 /// let r = LoopRange::point(2); // range [2, 2]
319 /// let s = LoopRange::star(); // range [0, +infinity]
320 ///
321 /// assert!(s.right_mul_is_exact(&r)); // [0, +infinity] * [2, 2] is [0, +infinity] (exact)
322 /// assert!(!r.right_mul_is_exact(&s)); // [2, 2] * [0, +infinity] is not an interval
323 /// ```
324 pub fn right_mul_is_exact(&self, other: &LoopRange) -> bool {
325 //
326 // Explanation:
327 // let self = [a, b] and other = [c, d].
328 // let K = Union( y * [a, b] for y = c to d ).
329 //
330 // In this, y * [a, b] is the sum of y integers, all taken in the
331 // interval [a, b]. So y * [a, b] is equal to [y * a, y * b].
332 //
333 // So K is the union of the intervals [y * a, y * b] for y = c to d.
334 //
335 // If c == d then K is an interval since then K=[c * a, c * b].
336 //
337 // If b == +infinity then there are two cases:
338 // 1) c == 0, then K = is [0, 0] \union [a, +infinity]
339 // K is an interval if a <= 1
340 // 2) c > 0 then K is the interval [c * a, +infinity]
341 //
342 // Otherwise, K is an interval if there's no gap between
343 // subsequent intervals [y*a, y*b] and [(y+1)*a, (y+1)*b].
344 // There's a gap iff (y+1)*a - y*b > 1, which is equivalent
345 // to y * (b - a) + 1 < a.
346 // So no gap for y is y * (b - a) >= a - 1.
347 //
348 // This condition holds for all y in [c, d] iff it holds when y = c.
349 //
350 other.is_point()
351 || if self.is_infinite() {
352 other.start() > 0 || self.start() <= 1
353 } else {
354 // c = other.start()
355 // b = self.end()
356 // a = self.start()
357 mul32(other.start(), self.end() - self.start()) >= self.start().saturating_sub(1)
358 }
359 }
360
361 ///
362 /// Shift by one.
363 ///
364 /// If self = [a, b], return [dec(a), dec(b)] where dec(x) *decrements* x by 1:
365 /// - dec(0) = 0
366 /// - dec(+infinity) = +infinity
367 /// - dec(x) = x-1 otherwise
368 ///
369 pub fn shift(&self) -> LoopRange {
370 match self {
371 LoopRange(0, None) => LoopRange::infinite(0),
372 LoopRange(0, Some(0)) => LoopRange::point(0),
373 LoopRange(0, Some(j)) => LoopRange::finite(0, *j - 1),
374 LoopRange(i, None) => LoopRange::infinite(*i - 1),
375 LoopRange(i, Some(j)) => LoopRange::finite(*i - 1, *j - 1),
376 }
377 }
378}
379
380#[allow(clippy::uninlined_format_args)]
381#[cfg(test)]
382mod test {
383 use super::LoopRange;
384
385 fn make_examples() -> Vec<LoopRange> {
386 vec![
387 LoopRange::finite(2, 3),
388 LoopRange::finite(4, 9),
389 LoopRange::finite(1, 3),
390 LoopRange::infinite(3),
391 LoopRange::infinite(20),
392 LoopRange::point(0),
393 LoopRange::point(1),
394 LoopRange::point(2),
395 LoopRange::point(5),
396 LoopRange::opt(),
397 LoopRange::star(),
398 LoopRange::plus(),
399 ]
400 }
401
402 #[test]
403 fn basic() {
404 println!("Test examples");
405 for rng in make_examples() {
406 println!(
407 "range {}: finite: {}, infinite: {}, point: {}",
408 rng,
409 rng.is_finite(),
410 rng.is_infinite(),
411 rng.is_point()
412 );
413 }
414 println!()
415 }
416
417 #[test]
418 fn test_shift() {
419 println!("Shift tests");
420 for rng in make_examples() {
421 println!("shift({}) = {}", rng, rng.shift());
422 }
423 println!()
424 }
425
426 #[test]
427 fn test_add() {
428 println!("Add tests");
429 let v = make_examples();
430 for r in &v {
431 for s in &v {
432 println!("add({}, {}) = {}", r, s, r.add(s))
433 }
434 }
435 println!()
436 }
437
438 #[test]
439 fn test_scale() {
440 println!("Scaling tests");
441 for r in make_examples() {
442 for k in 0..5 {
443 let s = r.scale(k);
444 println!("scale({}, {}) = {}", r, k, &s);
445 match k {
446 0 => assert!(s.is_zero()),
447 1 => assert_eq!(s, r),
448 _ => assert_eq!(s.is_finite(), r.is_finite()),
449 }
450 }
451 }
452 println!();
453 }
454
455 #[test]
456 fn test_mul() {
457 println!("Product tests");
458 let v = make_examples();
459 for r in &v {
460 for s in &v {
461 let product = r.mul(s);
462 let exact = r.right_mul_is_exact(s);
463 println!("mul({}, {}) = {} (exact: {})", r, s, product, exact);
464
465 if r.is_zero() || s.is_zero() {
466 assert!(product.is_zero());
467 }
468 if product.is_point() {
469 assert!(exact);
470 }
471 }
472 }
473 println!()
474 }
475
476 #[test]
477 fn test_mul_exact() {
478 let v = [
479 LoopRange::point(2),
480 LoopRange::finite(2, 3),
481 LoopRange::infinite(2),
482 ];
483
484 // r^3 is an interval for any r
485 for r in &v {
486 assert!(r.right_mul_is_exact(&LoopRange::point(3)));
487 }
488
489 let opt = LoopRange::opt();
490 let star = LoopRange::star();
491 let plus = LoopRange::plus();
492
493 // all example r contain integers >= 2
494 // r^opt and r^* contain 0 and integers >= 2
495 // so r^opt and r^* are not intervals
496 for r in &v {
497 assert!(!r.right_mul_is_exact(&opt));
498 assert!(!r.right_mul_is_exact(&star));
499 }
500
501 // 2^+ = { 2, 4, ...} : not an interval
502 assert!(!v[0].right_mul_is_exact(&plus));
503 // [2,3]^+ = [2, inf): interval
504 assert!(v[1].right_mul_is_exact(&plus));
505 // [2, inf^)+ = [2, inf): interval
506 assert!(v[2].right_mul_is_exact(&plus));
507
508 // plus^opt is star
509 assert!(plus.right_mul_is_exact(&opt));
510 assert_eq!(plus.mul(&opt), star);
511 // star^opt is star
512 assert!(star.right_mul_is_exact(&opt));
513 assert_eq!(star.mul(&opt), star);
514 // opt^opt is opt
515 assert!(opt.right_mul_is_exact(&opt));
516 assert_eq!(opt.mul(&opt), opt);
517 }
518}