aws_smt_strings/character_sets.rs
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Character sets and alphabet partitions
6//!
7//! An SMT-LIB character is an unsigned integer in the range `[0..MAX_CHAR]`. We represent characters
8//! by `u32` integers in this range. The constant [MAX_CHAR] is defined in [smt_strings][crate::smt_strings].
9//!
10//! A character set is either a single character (e.g., `'A'`) or a range of characters (e.g., `['0'..'9']`).
11//! We represent both as `CharSet`s. A `CharSet` is a pair of two integers, `start` and `end`, where
12//! `start <= end` and `end <= MAX_CHAR`. This denotes the character interval `[start, end]`.
13//!
14//! A partition is a collection of disjoint character sets, sorted in increasing order.
15//! A partition is then a list of intervals:
16//!
17//! [a<sub>0</sub>, b<sub>0</sub>], [a<sub>1</sub>, b<sub>1</sub>], ..., [a<sub>k</sub>, b<sub>k</sub>]
18//! where a<sub>i</sub> <= b<sub>i</sub> and b<sub>i</sub> < a<sub>i+1</sub>.
19//!
20//! A partition defines an equivalence relation over characters: two characters are
21//! equivalent either if they belong to the same class [a<sub>i</sub>, b<sub>i</sub>] or
22//! if they're outside of all the intervals.
23//!
24//! A partition with n intervals defines then (n+1) classes:
25//! C<sub>0</sub>, C<sub>1</sub>, ..., C<sub>n-1</sub> and D.
26//! - For i=0,..., n-1, class C<sub>i</sub> is the interval [a<sub>i</sub>, b<sub>i</sub>].
27//! - Class D is the complementary class, that is, the complement of Union(C<sub>0</sub>, ..., C<sub>n-1</sub>).
28//!
29//! Note: the complementary class D may be empty.
30//!
31//! Each class in a partition can be identified by its [ClassId]:
32//! - `ClassId::Interval(i)` denotes the class C<sub>i</sub>, that is, the interval [a<sub>i</sub>, b<sub>i</sub>].
33//! - `ClassId::Complement` denotes the complementary class D.
34//!
35//! Partitions are used to decompose the SMT-LIB alphabet (of 196608 characters) into a typically much smaller number
36//! of equivalent classes.
37//! - For regular expressions, a partition divides the alphabet into derivative classes.
38//! See [ReManager](crate::regular_expressions::ReManager).
39//! - For a finite-state automaton, a partition divides the alphabet into classes that have the
40//! same transitions. A character partition is attached to every state `s` in an automaton and the successors
41//! of `s` are defined for every class in this partition. See [Automaton](crate::automata::Automaton).
42//!
43
44use std::{
45 cmp::{max, min, Ordering},
46 fmt::Display,
47};
48
49use crate::{
50 errors::Error,
51 smt_strings::{char_to_smt, MAX_CHAR},
52};
53
54///
55/// Interval [start, end] where start <= end <= [MAX_CHAR].
56///
57/// This represents a range of characters.
58///
59#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
60pub struct CharSet {
61 start: u32,
62 end: u32,
63}
64
65impl Display for CharSet {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 let a = self.start;
68 let b = self.end;
69 if a == b {
70 write!(f, "{}", char_to_smt(a))
71 } else if a == 0 && b == MAX_CHAR {
72 write!(f, "\u{03a3}") // Sigma
73 } else {
74 write!(f, "[{}..{}]", char_to_smt(a), char_to_smt(b))
75 }
76 }
77}
78
79// partial order:
80// [a, b] < [c, d] iff b < c
81impl PartialOrd for CharSet {
82 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
83 if self == other {
84 Some(Ordering::Equal)
85 } else if self.end < other.start {
86 Some(Ordering::Less)
87 } else if self.start > other.end {
88 Some(Ordering::Greater)
89 } else {
90 None
91 }
92 }
93}
94
95impl CharSet {
96 /// Construct the singleton interval [x, x]
97 ///
98 /// - the integer x must be a valid SMT-LIB character (i.e., 0 <= x <= [MAX_CHAR])
99 pub fn singleton(x: u32) -> CharSet {
100 debug_assert!(x <= MAX_CHAR);
101 CharSet { start: x, end: x }
102 }
103
104 /// Construct the interval [x, y]
105 ///
106 /// - requires x <= y <= [MAX_CHAR]
107 pub fn range(x: u32, y: u32) -> CharSet {
108 debug_assert!(x <= y && y <= MAX_CHAR);
109 CharSet { start: x, end: y }
110 }
111
112 /// Construct the interval [0, [MAX_CHAR]]
113 pub fn all_chars() -> CharSet {
114 CharSet {
115 start: 0,
116 end: MAX_CHAR,
117 }
118 }
119
120 /// Check whether x is in this interval
121 ///
122 /// # Example
123 ///
124 /// ```
125 /// use aws_smt_strings::character_sets::CharSet;
126 ///
127 /// let c = CharSet::range('a' as u32, 'z' as u32);
128 /// assert!(c.contains('g' as u32));
129 /// assert!(!c.contains('0' as u32));
130 /// ```
131 pub fn contains(&self, x: u32) -> bool {
132 self.start <= x && x <= self.end
133 }
134
135 /// Check whether other is a subset of this set
136 ///
137 /// # Example
138 ///
139 /// ```
140 /// use aws_smt_strings::character_sets::CharSet;
141 ///
142 /// let c = CharSet::range('a' as u32, 'z' as u32);
143 /// assert!(c.covers(&CharSet::singleton('g' as u32)));
144 /// assert!(c.covers(&CharSet::range('t' as u32, 'z' as u32)));
145 /// assert!(! c.covers(&CharSet::range(0, 'k' as u32)));
146 /// ```
147 pub fn covers(&self, other: &CharSet) -> bool {
148 debug_assert!(other.start <= other.end);
149 self.start <= other.start && other.end <= self.end
150 }
151
152 /// Check whether this set is before x
153 ///
154 /// If set is [a, b] this checks whether b < x.
155 ///
156 /// # Example
157 ///
158 /// ```
159 /// use aws_smt_strings::character_sets::CharSet;
160 ///
161 /// let c = CharSet::range('a' as u32, 'z' as u32);
162 /// assert!(c.is_before(127));
163 /// assert!(! c.is_before('c' as u32));
164 /// ```
165 pub fn is_before(&self, x: u32) -> bool {
166 self.end < x
167 }
168
169 /// Check whether this set is after x
170 ///
171 /// If set is the interval [a, b], this checks whether x < a.
172 ///
173 /// # Example
174 ///
175 /// ```
176 /// use aws_smt_strings::character_sets::CharSet;
177 ///
178 /// let c = CharSet::range('a' as u32, 'z' as u32);
179 /// assert!(! c.is_after(127));
180 /// assert!(c.is_after('Z' as u32));
181 /// ```
182 pub fn is_after(&self, x: u32) -> bool {
183 x < self.start
184 }
185
186 /// Get the size of this set
187 ///
188 /// Return the number of characters in the interval
189 ///
190 /// # Example
191 ///
192 /// ```
193 /// use aws_smt_strings::character_sets::CharSet;
194 ///
195 /// let c = CharSet::range('a' as u32, 'z' as u32);
196 /// assert_eq!(c.size(), 26);
197 /// ```
198 pub fn size(&self) -> u32 {
199 self.end - self.start + 1
200 }
201
202 ///
203 /// Check whether the set is a singleton
204 ///
205 /// # Example
206 ///
207 /// ```
208 /// use aws_smt_strings::character_sets::CharSet;
209 ///
210 /// let c = CharSet::range('a' as u32, 'z' as u32);
211 /// let d = CharSet::singleton('K' as u32);
212 /// assert!(d.is_singleton());
213 /// assert!(! c.is_singleton());
214 /// ```
215 pub fn is_singleton(&self) -> bool {
216 self.start == self.end
217 }
218
219 /// Check whether this set is the full alphabet
220 ///
221 /// # Example
222 ///
223 /// ```
224 /// use aws_smt_strings::{character_sets::CharSet, smt_strings::MAX_CHAR};
225 ///
226 /// let c = CharSet::range(0, MAX_CHAR);
227 /// let d = CharSet::range('a' as u32, 'z' as u32);
228 /// assert!(c.is_alphabet());
229 /// assert!(! d.is_alphabet());
230 /// ```
231 pub fn is_alphabet(&self) -> bool {
232 self.start == 0 && self.end == MAX_CHAR
233 }
234
235 /// Pick a character in the set
236 ///
237 /// # Example
238 ///
239 /// ```
240 /// use aws_smt_strings::character_sets::CharSet;
241 ///
242 /// let c = CharSet::range('a' as u32, 'z' as u32);
243 /// let d = c.pick();
244 /// assert!('a' as u32 <= d && d <= 'z' as u32);
245 /// ```
246 pub fn pick(&self) -> u32 {
247 self.start
248 }
249
250 /// Intersection of two intervals
251 ///
252 /// - return None if the intersection is empty
253 /// - return Some(charset) otherwise.
254 ///
255 /// # Example
256 ///
257 /// ```
258 /// use aws_smt_strings::character_sets::CharSet;
259 ///
260 /// let c = CharSet::range('a' as u32, 'z' as u32); // ['a', 'z']
261 /// let d = CharSet::range('t' as u32, 127); // ['t', 127]
262 ///
263 /// // the intersection of c and d is ['t', 'z']
264 /// assert_eq!(c.inter(&d), Some(CharSet::range('t' as u32, 'z' as u32)));
265 /// ```
266 pub fn inter(&self, other: &CharSet) -> Option<CharSet> {
267 let max_start = max(self.start, other.start);
268 let min_end = min(self.end, other.end);
269 if max_start <= min_end {
270 Some(Self::range(max_start, min_end))
271 } else {
272 None
273 }
274 }
275
276 /// Intersection of an array of intervals
277 ///
278 /// See [inter][Self::inter]
279 /// - return None if the intersection is empty
280 /// - return Some(c) otherwise
281 pub fn inter_list(a: &[CharSet]) -> Option<CharSet> {
282 if a.is_empty() {
283 Some(Self::all_chars())
284 } else {
285 let mut result = a[0];
286 for s in &a[1..] {
287 match result.inter(s) {
288 None => return None,
289 Some(x) => result = x,
290 }
291 }
292 Some(result)
293 }
294 }
295
296 /// Union of two intervals
297 ///
298 /// - return None if the union is not an interval
299 /// - return Some(c) where c is a CharSet otherwise.
300 ///
301 /// # Example
302 ///
303 /// ```
304 /// use aws_smt_strings::character_sets::CharSet;
305 ///
306 /// let c = CharSet::range('a' as u32, 'z' as u32);
307 /// let d = CharSet::range('t' as u32, 127);
308 /// let e = CharSet::range('0' as u32, '9' as u32);
309 ///
310 /// assert_eq!(c.union(&d), Some(CharSet::range('a' as u32, 127)));
311 /// assert_eq!(c.union(&e), None);
312 /// ```
313 pub fn union(&self, other: &CharSet) -> Option<CharSet> {
314 let max_end = max(self.end, other.end);
315 // union([a, b], [c, d]) is an interval if a <= c-1 <= b or c <= a-1 <= d.
316 // we treat the case a==c separately to avoid underflow (when a=0 or c=0)
317 if self.start == other.start || (self.start < other.start && self.end >= other.start - 1) {
318 Some(Self::range(self.start, max_end))
319 } else if other.start < self.start && other.end >= self.start - 1 {
320 Some(Self::range(other.start, max_end))
321 } else {
322 None
323 }
324 }
325}
326
327///
328/// Class that covers an interval in a [CharPartition]
329///
330/// A CoverResult identifies the [CharPartition]'s class that contains
331/// an interval [a, b] if any.
332///
333/// For an interval [a, b] and a partition
334/// [a<sub>0</sub>, b<sub>0</sub>], ..., [a<sub>n</sub>, b<sub>n</sub>],
335/// CoverResult describes three possible outcomes:
336/// - `CoveredBy(i)` means that [a, b] is included in class C<sub>i</sub> = [a<sub>i</sub>, b<sub>i</sub>]
337/// - `DisjointFromAll` means that [a, b] does not intersect with any [a<sub>i</sub>, b<sub>i</sub>] so
338/// [a, b] is included in the complementary class.
339/// - `Overlaps` means that [a, b] and some interval [a<sub>i</sub>, b<sub>i</sub>] intersect,
340/// but [a, b] is not contained in this internal.
341///
342#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
343pub enum CoverResult {
344 /// CoveredBy(i) denotes the i-th interval [a<sub>i</sub>, b<sub>i</sub>] in the partition
345 CoveredBy(usize),
346 /// DisjointFromAll denotes the partition's complementary class
347 DisjointFromAll,
348 /// Overlaps means that [a, b] intersects some interval [a<sub>i</sub>, b<sub>i</sub>]
349 /// but is not fully included in this interval.
350 Overlaps,
351}
352
353impl Display for CoverResult {
354 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355 match self {
356 CoverResult::CoveredBy(i) => write!(f, "CoveredBy({i})"),
357 CoverResult::DisjointFromAll => write!(f, "DisjointFromAll"),
358 CoverResult::Overlaps => write!(f, "Overlaps"),
359 }
360 }
361}
362
363/// ClassId
364///
365/// A class id identifies a character class defined by a partition.
366/// It can either be Interval(i) where i is an index between 0 and the partition length-1 or
367/// Complement.
368/// - Interval(i) denotes the i-th interval in a partition
369/// - Complement denotes the complementary class (not an interval in general)
370#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
371pub enum ClassId {
372 /// Id of a partition interval
373 Interval(usize),
374 /// Id of the complementary partition
375 Complement,
376}
377
378impl Display for ClassId {
379 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
380 match self {
381 ClassId::Interval(i) => write!(f, "Interval({i})"),
382 ClassId::Complement => write!(f, "Complement"),
383 }
384 }
385}
386
387///
388/// Collection of disjoint character sets.
389///
390/// Each character set is an interval [a, b] where a <= b.
391///
392/// The character sets are sorted in increasing order so a
393/// partition of length n is a sequence
394///
395/// [a<sub>0</sub>, b<sub>0</sub>], [a<sub>1</sub>, b<sub>1</sub>], ..., [a<sub>n-1</sub>, b<sub>n-1</sub>]
396/// where a<sub>i</sub> <= b<sub>i</sub> and b<sub>i</sub> < a<sub>i+1</sub>.
397///
398/// This divides the alphabet into n+1 classes C<sub>0</sub>, ..., C<sub>n-1</sub>, and D:
399/// - C<sub>i</sub> = { x | a<sub>i</sub> <= x <= b<sub>i</sub> }
400/// - D = complement of Union(C<sub>0</sub>, ..., C<sub>n-1</sub>)
401///
402/// Each class in a partition can be identified by its [ClassId]:
403/// - `ClassId::Interval(i)` denotes the class C<sub>i</sub>, that is, the interval [a<sub>i</sub>, b<sub>i</sub>].
404/// - `ClassId::Complement` denotes the complementary class D.
405//
406// The partition structure includes an integer comp_witness, which is the smallest integer that doesn't
407// belong to any of the intervals C<sub>i</sub>. So if comp_witness <= [MAX_CHAR], it's an element of
408// the complementary class. Otherwise, comp_witness = [MAX_CHAR]+1 and the complementary class is empty.
409//
410#[derive(Debug, PartialEq, Eq, Default, Clone)]
411pub struct CharPartition {
412 list: Vec<CharSet>,
413 comp_witness: u32,
414}
415
416impl CharPartition {
417 /// Number of intervals in the partition
418 pub fn len(&self) -> usize {
419 self.list.len()
420 }
421
422 /// Check whether the partition is empty (i.e., no intervals)
423 pub fn is_empty(&self) -> bool {
424 self.list.is_empty()
425 }
426
427 /// Create an empty partition
428 pub fn new() -> Self {
429 CharPartition::default()
430 }
431
432 /// Partition with a single character set
433 ///
434 /// # Example
435 ///
436 /// ```
437 /// use aws_smt_strings::character_sets::*;
438 ///
439 /// let p = CharPartition::from_set(&CharSet::range('a' as u32, 'b' as u32));
440 /// assert_eq!(p.len(), 1);
441 /// assert_eq!(p.get(0), ('a' as u32, 'b' as u32))
442 /// ```
443 pub fn from_set(c: &CharSet) -> Self {
444 let witness = if c.start > 0 { 0 } else { c.end + 1 };
445 CharPartition {
446 list: vec![*c],
447 comp_witness: witness,
448 }
449 }
450
451 /// Build a partition from a CharSet iterator
452 ///
453 /// Succeeds if the CharSets are pairwise disjoint.
454 /// Fails otherwise.
455 ///
456 /// # Errors
457 ///
458 /// If some CharSets have a non-empty intersection,
459 /// return Err([Error::NonDisjointCharSets]).
460 ///
461 /// # Example
462 ///
463 /// ```
464 /// use aws_smt_strings::character_sets::*;
465 ///
466 /// # use std::error::Error;
467 /// #
468 /// # fn main() -> Result<(), Box<dyn Error>> {
469 /// let v = [
470 /// CharSet::range(120, 400),
471 /// CharSet::range(0, 10),
472 /// CharSet::range(1000, 2000)];
473 ///
474 /// let p = CharPartition::try_from_iter(v.into_iter().copied())?;
475 /// assert_eq!(p.len(), 3);
476 /// assert_eq!(p.get(0), (0, 10));
477 /// assert_eq!(p.get(1), (120, 400));
478 /// assert_eq!(p.get(2), (1000, 2000));
479 /// # Ok(())
480 /// # }
481 /// ```
482 pub fn try_from_iter(iter: impl Iterator<Item = CharSet>) -> Result<Self, Error> {
483 let mut v: Vec<CharSet> = iter.collect();
484 let mut comp_witness = 0;
485 if !v.is_empty() {
486 v.sort_by_key(|c| c.start);
487 let mut prev = &v[0];
488 if prev.start <= comp_witness {
489 comp_witness = prev.end + 1;
490 }
491 for c in &v[1..] {
492 if c.start <= prev.end {
493 return Err(Error::NonDisjointCharSets);
494 }
495 if c.start <= comp_witness {
496 comp_witness = c.end + 1;
497 }
498 prev = c;
499 }
500 }
501 Ok(CharPartition {
502 list: v,
503 comp_witness,
504 })
505 }
506
507 /// Build a partition from a list of CharSets
508 ///
509 /// Succeeds if the CharSets in `a` are pairwise disjoint.
510 /// Fails otherwise.
511 ///
512 /// # Errors
513 ///
514 /// If some elements of `a` have a non-empty intersection,
515 /// return Err([Error::NonDisjointCharSets]).
516 ///
517 /// # Example
518 ///
519 /// ```
520 /// use aws_smt_strings::character_sets::*;
521 ///
522 /// # use std::error::Error;
523 /// #
524 /// # fn main() -> Result<(), Box<dyn Error>> {
525 /// let v = [
526 /// CharSet::range(120, 400),
527 /// CharSet::range(0, 10),
528 /// CharSet::range(1000, 2000)];
529 ///
530 /// let p = CharPartition::try_from_list(&v)?;
531 /// assert_eq!(p.len(), 3);
532 /// assert_eq!(p.get(0), (0, 10));
533 /// assert_eq!(p.get(1), (120, 400));
534 /// assert_eq!(p.get(2), (1000, 2000));
535 ///
536 /// # Ok(())
537 /// # }
538 /// ```
539 ///
540 pub fn try_from_list(a: &[CharSet]) -> Result<Self, Error> {
541 Self::try_from_iter(a.iter().copied())
542 }
543
544 /// Add the interval [start, end] at the end of the partition.
545 ///
546 /// Requires `start <= end` and `end <= MAX_CHAR`.
547 /// If the partition is not empty, `start` must also be larger than the end of the
548 /// last interval in the partition.
549 ///
550 /// # Example
551 /// This constructs the two-interval partition `['0', '9'] ['Z', 'Z']`.
552 ///
553 /// ```
554 /// use aws_smt_strings::character_sets::*;
555 ///
556 /// let mut p = CharPartition::new();
557 /// p.push('0' as u32, '9' as u32);
558 /// p.push('Z' as u32, 'Z' as u32);
559 ///
560 /// assert_eq!(p.len(), 2);
561 /// assert_eq!(p.get(0), ('0' as u32, '9' as u32));
562 /// assert_eq!(p.get(1), ('Z' as u32, 'Z' as u32));
563 /// ```
564 pub fn push(&mut self, start: u32, end: u32) {
565 debug_assert!(start <= end && end <= MAX_CHAR);
566 debug_assert!(self.list.is_empty() || start > self.list.last().unwrap().end);
567 self.list.push(CharSet { start, end });
568 if start <= self.comp_witness {
569 self.comp_witness = end + 1;
570 }
571 }
572
573 /// Get the pair (start, end) of the i-the interval in the partition
574 ///
575 /// Intervals are indexed from 0 to `p.len() - 1` (inclusive).
576 /// - if i < p.len(), return the start and end of the i-th interval.
577 /// - if i >= p.len(), return the pair `(MAX_CHAR+1, MAX_CHAR+1)`
578 ///
579 /// # Example
580 ///
581 /// ```
582 /// use aws_smt_strings::character_sets::*;
583 /// use aws_smt_strings::smt_strings::MAX_CHAR;
584 ///
585 /// // partition ['0', '9'] ['Z', 'Z']
586 /// let mut p = CharPartition::new();
587 /// p.push('0' as u32, '9' as u32);
588 /// p.push('Z' as u32, 'Z' as u32);
589 ///
590 /// assert_eq!(p.get(0), ('0' as u32, '9' as u32)); // first interval
591 /// assert_eq!(p.get(3), (MAX_CHAR + 1, MAX_CHAR + 1)); // out-of-range index
592 /// ```
593 pub fn get(&self, i: usize) -> (u32, u32) {
594 if i < self.list.len() {
595 let r = &self.list[i];
596 (r.start, r.end)
597 } else {
598 (MAX_CHAR + 1, MAX_CHAR + 1)
599 }
600 }
601
602 /// Get the i-the interval in the partition as a CharSet.
603 ///
604 /// # Panics
605 ///
606 /// If i is out of bound, that is, if i >= number of intervals in the partition.
607 ///
608 /// # Example
609 ///
610 /// ```
611 /// use aws_smt_strings::character_sets::*;
612 ///
613 /// let mut p = CharPartition::new();
614 /// p.push('a' as u32, 'z' as u32);
615 ///
616 /// assert_eq!(p.interval(0), CharSet::range('a' as u32, 'z' as u32));
617 /// ```
618 pub fn interval(&self, i: usize) -> CharSet {
619 self.list[i]
620 }
621
622 /// Get the start of the i-th interval
623 ///
624 /// - return `MAX_CHAR+1` if i is out of bound.
625 ///
626 /// # Example
627 ///
628 /// ```
629 /// use aws_smt_strings::character_sets::*;
630 /// use aws_smt_strings::smt_strings::MAX_CHAR;
631 ///
632 /// let mut p = CharPartition::new();
633 /// p.push('a' as u32, 'z' as u32);
634 ///
635 /// assert_eq!(p.start(0), 'a' as u32);
636 /// assert_eq!(p.start(1), MAX_CHAR+1);
637 /// ```
638 pub fn start(&self, i: usize) -> u32 {
639 if i < self.len() {
640 self.list[i].start
641 } else {
642 MAX_CHAR + 1
643 }
644 }
645
646 /// Get the end of the i-th interval
647 ///
648 /// - return `MAX_CHAR+1` if i is out of bound
649 ///
650 /// # Example
651 ///
652 /// ```
653 /// use aws_smt_strings::character_sets::*;
654 /// use aws_smt_strings::smt_strings::MAX_CHAR;
655 ///
656 /// let mut p = CharPartition::new();
657 /// p.push('a' as u32, 'z' as u32);
658 ///
659 /// assert_eq!(p.end(0), 'z' as u32);
660 /// assert_eq!(p.end(1), MAX_CHAR+1);
661 /// ```
662 pub fn end(&self, index: usize) -> u32 {
663 if index < self.len() {
664 self.list[index].end
665 } else {
666 MAX_CHAR + 1
667 }
668 }
669
670 /// Pick an element in interval i
671 ///
672 /// # Panics
673 ///
674 /// if i >= number of intervals in the partition
675 ///
676 /// # Example
677 ///
678 /// ```
679 /// use aws_smt_strings::character_sets::*;
680 ///
681 /// let mut p = CharPartition::new();
682 /// p.push('a' as u32, 'z' as u32);
683 ///
684 /// assert!('a' as u32 <= p.pick(0) && p.pick(0) <= 'z' as u32);
685 /// ```
686 pub fn pick(&self, i: usize) -> u32 {
687 self.list[i].start
688 }
689
690 /// Check whether the complementary class is empty
691 ///
692 /// # Example
693 ///
694 /// ```
695 /// use aws_smt_strings::character_sets::*;
696 /// use aws_smt_strings::smt_strings::MAX_CHAR;
697 ///
698 /// let mut p = CharPartition::new();
699 /// p.push(0, 127);
700 /// assert!(! p.empty_complement());
701 /// p.push(128, MAX_CHAR);
702 /// assert!(p.empty_complement());
703 /// ```
704 pub fn empty_complement(&self) -> bool {
705 self.comp_witness > MAX_CHAR
706 }
707
708 /// Pick an element in the complementary class
709 ///
710 /// - return `MAX_CHAR+1` if the complementary class is empty
711 ///
712 /// # Example
713 ///
714 /// ```
715 /// use aws_smt_strings::character_sets::*;
716 /// use aws_smt_strings::smt_strings::MAX_CHAR;
717 ///
718 /// // partition with a single interval ['a', 'z']
719 /// let p = CharPartition::from_set(&CharSet::range('a' as u32, 'z' as u32));
720 ///
721 /// // the complementary class is the union of [0, 'a' - 1] and ['z' + 1, MAX_CHAR]
722 /// let x = p.pick_complement();
723 /// assert!(x < ('a' as u32) || ('z' as u32) < x && x <= MAX_CHAR);
724 /// ```
725 pub fn pick_complement(&self) -> u32 {
726 self.comp_witness
727 }
728
729 /// Check whether a class id is valid
730 ///
731 /// # Example
732 ///
733 /// ```
734 /// use aws_smt_strings::character_sets::*;
735 ///
736 /// // partition with two intervals and three classes
737 /// let mut p = CharPartition::new();
738 /// p.push('0' as u32, '9' as u32);
739 /// p.push('Z' as u32, 'Z' as u32);
740 ///
741 /// assert!(p.valid_class_id(ClassId::Interval(0)));
742 /// assert!(p.valid_class_id(ClassId::Interval(1)));
743 /// assert!(p.valid_class_id(ClassId::Complement));
744 ///
745 /// assert!(! p.valid_class_id(ClassId::Interval(2)));
746 /// ```
747 pub fn valid_class_id(&self, cid: ClassId) -> bool {
748 use ClassId::*;
749 match cid {
750 Interval(i) => i < self.len(),
751 Complement => !self.empty_complement(),
752 }
753 }
754
755 /// Number of classes
756 ///
757 /// # Example
758 ///
759 /// ```
760 /// use aws_smt_strings::character_sets::*;
761 ///
762 /// // partition with two intervals and three classes
763 /// let mut p = CharPartition::new();
764 /// p.push('0' as u32, '9' as u32);
765 /// p.push('Z' as u32, 'Z' as u32);
766 ///
767 /// assert_eq!(p.num_classes(), 3);
768 /// ```
769 pub fn num_classes(&self) -> usize {
770 let n = self.len();
771 if self.empty_complement() {
772 n
773 } else {
774 n + 1
775 }
776 }
777
778 /// Pick an element in a class
779 ///
780 /// - cid identifies the class: if cid = Interval(i) then pick in the i-th interval of p
781 /// (intervals are indexed from 0 to p.len() - 1)
782 /// - if cid is Complement then pick in the complementary class
783 ///
784 /// # Panics
785 ///
786 /// If cid is Interval(i) and i >= p.len() or cid is Complement and the complementary class
787 /// is empty.
788 ///
789 /// # Example
790 ///
791 /// ```
792 /// use aws_smt_strings::character_sets::{ClassId::*, *};
793 /// use aws_smt_strings::smt_strings::MAX_CHAR;
794 ///
795 /// let mut p = CharPartition::new();
796 /// p.push('0' as u32, '9' as u32);
797 /// p.push('Z' as u32, 'Z' as u32);
798 ///
799 /// let x = p.pick_in_class(Interval(1));
800 /// assert_eq!(x, 'Z' as u32); // since interval 1 is ['Z', 'Z']
801 ///
802 /// let y = p.pick_in_class(Complement);
803 /// assert!(0 <= y && y < ('0' as u32) ||
804 /// ('9' as u32) < y && y < ('Z' as u32) ||
805 /// ('Z' as u32) < y && y <= MAX_CHAR);
806 /// ```
807 pub fn pick_in_class(&self, cid: ClassId) -> u32 {
808 use ClassId::*;
809 match cid {
810 Interval(i) => self.pick(i),
811 Complement => {
812 assert!(!self.empty_complement());
813 self.pick_complement()
814 }
815 }
816 }
817
818 /// Iterator to go through all intervals in the partition
819 pub fn ranges(&self) -> impl Iterator<Item = &CharSet> {
820 self.list.iter()
821 }
822
823 /// Iterator to go through all valid class ids
824 pub fn class_ids(&self) -> ClassIdIterator<'_> {
825 ClassIdIterator {
826 partition: self,
827 counter: 0,
828 }
829 }
830
831 /// Iterator to pick one character in each class
832 /// (including the complementary class).
833 pub fn picks(&self) -> PickIterator<'_> {
834 PickIterator {
835 partition: self,
836 counter: 0,
837 }
838 }
839
840 ///
841 /// Search for the class that contains a character
842 ///
843 /// - Return `Interval(i)` if `a_i <= x <= b_i`
844 /// - Return `Complement` if x is not in any interval
845 ///
846 pub fn class_of_char(&self, x: u32) -> ClassId {
847 #[allow(clippy::many_single_char_names)]
848 fn binary_search(p: &[CharSet], x: u32) -> ClassId {
849 let mut i = 0;
850 let mut j = p.len();
851 while i < j {
852 let h = i + (j - i) / 2;
853 if p[h].contains(x) {
854 return ClassId::Interval(h);
855 }
856 if p[h].is_before(x) {
857 i = h + 1;
858 } else {
859 j = h;
860 }
861 }
862 ClassId::Complement
863 }
864
865 binary_search(&self.list, x)
866 }
867
868 ///
869 /// Search for an interval that covers a character set
870 ///
871 /// The character set is an interval [a, b] where `0 <= a <= b <= MAX_CHAR`.
872 /// - Return `CoverResult::CoveredBy(i)` if [a, b] is included in the i-th
873 /// interval of the partition.
874 /// - Return `CoverResult::DisjointFromAll` if [a, b] does not overlap any
875 /// interval in the partition.
876 /// - Return `CoverResult::Overlaps` if [a, b] overlaps some interval in
877 /// the partition but it not contained in this interval.
878 ///
879 /// # Example
880 ///
881 /// ```
882 /// use aws_smt_strings::character_sets::*;
883 ///
884 /// let p = CharPartition::from_set(&CharSet::range('0' as u32, '9' as u32));
885 /// let test1 = CharSet::range('4' as u32, '8' as u32);
886 /// let test2 = CharSet::range('a' as u32, 'z' as u32);
887 /// let test3 = CharSet::range('5' as u32, '?' as u32);
888 ///
889 /// assert_eq!(p.interval_cover(&test1), CoverResult::CoveredBy(0));
890 /// assert_eq!(p.interval_cover(&test2), CoverResult::DisjointFromAll);
891 /// assert_eq!(p.interval_cover(&test3), CoverResult::Overlaps);
892 /// ```
893 pub fn interval_cover(&self, set: &CharSet) -> CoverResult {
894 // search for the largest i such that a_i <= x
895 // return 0 if there's no such i (either because p is empty
896 // or because x < a_0)
897 #[allow(clippy::many_single_char_names)]
898 fn binary_search(p: &[CharSet], x: u32) -> usize {
899 let mut i = 0;
900 let mut j = p.len();
901 while i + 1 < j {
902 let h = i + (j - i) / 2;
903 if p[h].start <= x {
904 i = h
905 } else {
906 j = h
907 }
908 }
909 i
910 }
911
912 let a = set.start;
913 let b = set.end;
914 debug_assert!(a <= b && b <= MAX_CHAR);
915
916 let i = binary_search(&self.list, a);
917 let (a_i, b_i) = self.get(i);
918
919 if a < a_i {
920 debug_assert!(i == 0);
921 if b < a_i {
922 // a <= b < a_0
923 CoverResult::DisjointFromAll
924 } else {
925 // a < a_0 <= b
926 CoverResult::Overlaps
927 }
928 } else if a <= b_i {
929 if b <= b_i {
930 // a_i <= a <= b <= b_i
931 CoverResult::CoveredBy(i)
932 } else {
933 // a_i <= a <= b_i < b
934 CoverResult::Overlaps
935 }
936 } else {
937 // note: we know i < p.len() <= MAX_CHAR so i+1 can't overflow here
938 let next_ai = self.end(i + 1);
939 if b < next_ai {
940 // a_i <= b_i < a <= b < a_{i+1}
941 CoverResult::DisjointFromAll
942 } else {
943 // a_i <= b_i < a < a_{i+1} <= b
944 CoverResult::Overlaps
945 }
946 }
947 }
948
949 /// Get the class id for a character set
950 ///
951 /// - The class id is Interval(i) if the set is covered by interval
952 /// [a<sub>i</sub>, b<sub>i</sub>] of the partition
953 /// - The class id is Complement if the set is covered by the partition's
954 /// complementary class
955 /// - Otherwise, the class id is not defined.
956 ///
957 /// # Error
958 ///
959 /// If the class id is not defined for s, return Err([Error::AmbiguousCharSet])
960 ///
961 pub fn class_of_set(&self, s: &CharSet) -> Result<ClassId, Error> {
962 use ClassId::*;
963 use CoverResult::*;
964
965 match self.interval_cover(s) {
966 CoveredBy(i) => Ok(Interval(i)),
967 DisjointFromAll => Ok(Complement),
968 Overlaps => Err(Error::AmbiguousCharSet),
969 }
970 }
971
972 /// Check whether character set c is consistent with the partition.
973 ///
974 /// - return true if c is included in one partition's class or if
975 /// c is included in the partition's complementary class.
976 ///
977 /// # Example
978 ///
979 /// ```
980 /// use aws_smt_strings::character_sets::*;
981 ///
982 /// let p = CharPartition::from_set(&CharSet::range('0' as u32, '9' as u32));
983 /// let test1 = CharSet::range('4' as u32, '8' as u32);
984 /// let test2 = CharSet::range('a' as u32, 'z' as u32);
985 /// let test3 = CharSet::range('5' as u32, '?' as u32);
986 ///
987 /// assert!(p.good_char_set(&test1));
988 /// assert!(p.good_char_set(&test2));
989 /// assert!(! p.good_char_set(&test3));
990 /// ```
991 pub fn good_char_set(&self, c: &CharSet) -> bool {
992 !matches!(self.interval_cover(c), CoverResult::Overlaps)
993 }
994}
995
996impl Display for CharPartition {
997 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
998 write!(f, "{{ ")?;
999 for r in self.ranges() {
1000 write!(f, "{r} ")?;
1001 }
1002 write!(f, "}}")
1003 }
1004}
1005
1006/// Iterator to go through all valid ClassId's in a partition
1007#[derive(Debug)]
1008pub struct ClassIdIterator<'a> {
1009 partition: &'a CharPartition,
1010 counter: usize,
1011}
1012
1013impl<'a> Iterator for ClassIdIterator<'a> {
1014 type Item = ClassId;
1015
1016 fn next(&mut self) -> Option<Self::Item> {
1017 let i = self.counter;
1018 self.counter += 1;
1019 if i < self.partition.len() {
1020 Some(ClassId::Interval(i))
1021 } else if i == self.partition.len() && !self.partition.empty_complement() {
1022 Some(ClassId::Complement)
1023 } else {
1024 None
1025 }
1026 }
1027
1028 fn size_hint(&self) -> (usize, Option<usize>) {
1029 let mut size = self.partition.len();
1030 if !self.partition.empty_complement() {
1031 size += 1;
1032 }
1033 (size, Some(size))
1034 }
1035}
1036
1037/// Iterator to pick a character in each class of a partition
1038///
1039/// If the partition consists of `n` intervals, the iterator will
1040/// pick an element in each interval first, then it will pick an
1041/// element in the complementary partition if this complementary
1042/// partition is not empty.
1043///
1044#[derive(Debug)]
1045pub struct PickIterator<'a> {
1046 partition: &'a CharPartition,
1047 counter: usize,
1048}
1049
1050impl<'a> Iterator for PickIterator<'a> {
1051 type Item = u32;
1052
1053 fn next(&mut self) -> Option<u32> {
1054 let i = self.counter;
1055 self.counter += 1;
1056 if i < self.partition.len() {
1057 Some(self.partition.pick(i))
1058 } else if i == self.partition.len() && !self.partition.empty_complement() {
1059 Some(self.partition.pick_complement())
1060 } else {
1061 None
1062 }
1063 }
1064}
1065
1066///
1067/// Merge two partitions p1 and p2
1068///
1069/// The result is the coarsest partition p that is a refinement of both p1 and p2.
1070/// This means that every interval of p1 and every interval of p2 is the union
1071/// of one or more successive intervals of p.
1072///
1073/// More precisely:
1074/// - let p1 = [a<sub>0</sub>, b<sub>0</sub>], ..., [a<sub>n</sub>, b<sub>n</sub>]
1075/// - let p2 = [c<sub>0</sub>, d<sub>0</sub>], ..., [c<sub>m</sub>, d<sub>m</sub>]
1076/// - let D1 = complement(Union [a<sub>i</sub>, b<sub>i</sub>])
1077/// - let D2 = complement(Union [c<sub>j</sub>, d<sub>j</sub>])
1078///
1079/// Then every interval of p is of one of the following forms
1080/// 1) [a<sub>i</sub>, b<sub>i</sub>] inter D2
1081/// 2) D1 inter [c<sub>j</sub>, d<sub>j</sub>]
1082/// 3) [a<sub>i</sub>, b<sub>j</sub>] inter [c<sub>j</sub>, d<sub>j</sub>]
1083///
1084/// where 0 <= i <= n and 0 <= j <= m.
1085///
1086/// # Example
1087///
1088/// ```
1089/// use aws_smt_strings::character_sets::*;
1090///
1091/// // partition p with intervals ['0', '9'] and ['a', 'g']
1092/// let mut p = CharPartition::new();
1093/// p.push('0' as u32, '9' as u32);
1094/// p.push('a' as u32, 'g' as u32);
1095///
1096/// // partition q with intervals ['5', '5'] and ['c', 'z']
1097/// let mut q = CharPartition::new();
1098/// q.push('5' as u32, '5' as u32);
1099/// q.push('c' as u32, 'z' as u32);
1100///
1101/// // r = merge(p, q) contains six intervals
1102/// // r = ['0', '4'] ['5', '5'] ['6', '9'] ['a', 'b'] ['c', 'g'] ['h', 'z']
1103/// let r = merge_partitions(&p, &q);
1104///
1105/// assert_eq!(r.len(), 6);
1106/// assert_eq!(r.get(0), ('0' as u32, '4' as u32));
1107/// assert_eq!(r.get(1), ('5' as u32, '5' as u32));
1108/// assert_eq!(r.get(2), ('6' as u32, '9' as u32));
1109/// assert_eq!(r.get(3), ('a' as u32, 'b' as u32));
1110/// assert_eq!(r.get(4), ('c' as u32, 'g' as u32));
1111/// assert_eq!(r.get(5), ('h' as u32, 'z' as u32));
1112/// ```
1113#[allow(clippy::many_single_char_names)]
1114pub fn merge_partitions(p1: &CharPartition, p2: &CharPartition) -> CharPartition {
1115 // consume interval i of p as [a, b]
1116 fn next_interval(p: &CharPartition, i: usize) -> (usize, u32, u32) {
1117 let (x, y) = p.get(i);
1118 (i + 1, x, y)
1119 }
1120
1121 let mut triple1 = next_interval(p1, 0);
1122 let mut triple2 = next_interval(p2, 0);
1123
1124 let mut result = CharPartition::new();
1125 while triple1.2 <= MAX_CHAR || triple2.2 <= MAX_CHAR {
1126 let (i, a, b) = triple1;
1127 let (j, c, d) = triple2;
1128 // [a, b] is a sub-range of the i-th interval of p1
1129 // [c, d] is a sub-range of the j-th interval of p2
1130 if b < c {
1131 // [a, b] < [c, d]
1132 result.push(a, b);
1133 triple1 = next_interval(p1, i);
1134 } else if d < a {
1135 // [c, d] < [a, b]
1136 result.push(c, d);
1137 triple2 = next_interval(p2, j);
1138 } else if c < a {
1139 // [c, d] and [a, b] overlap and c<a
1140 result.push(c, a - 1);
1141 triple2.1 = a; // next interval in p2 is [a, d]
1142 } else if a < c {
1143 // [c, d] and [a, b] overlap and a<c
1144 result.push(a, c - 1);
1145 triple1.1 = c; // next interval in p1 is [c, b]
1146 } else if b < d {
1147 // a=c and b<d: [a, b] is a prefix of [c, d]
1148 result.push(a, b);
1149 triple1 = next_interval(p1, i);
1150 triple2.1 = b + 1;
1151 } else if d < b {
1152 // a=c and d<b: [c, d] is a prefix of [a, b]
1153 result.push(c, d);
1154 triple1.1 = d + 1;
1155 triple2 = next_interval(p2, j);
1156 } else {
1157 // a=c and b=d
1158 result.push(a, b);
1159 triple1 = next_interval(p1, i);
1160 triple2 = next_interval(p2, j);
1161 }
1162 }
1163 result
1164}
1165
1166///
1167/// Merge a list of partitions
1168///
1169/// See [merge_partitions]
1170pub fn merge_partition_list<'a>(list: impl Iterator<Item = &'a CharPartition>) -> CharPartition {
1171 let mut result = CharPartition::new();
1172 for p in list {
1173 result = merge_partitions(&result, p)
1174 }
1175 result
1176}
1177
1178#[cfg(test)]
1179mod test {
1180 use super::*;
1181
1182 fn good_partition(p: &CharPartition) -> bool {
1183 let mut prev_end = MAX_CHAR + 1;
1184 for s in p.ranges() {
1185 if s.start > s.end {
1186 return false;
1187 }
1188 if prev_end <= MAX_CHAR && s.start <= prev_end {
1189 return false;
1190 }
1191 if s.start <= p.comp_witness && p.comp_witness <= s.end {
1192 return false;
1193 }
1194 prev_end = s.end;
1195 }
1196 true
1197 }
1198
1199 fn example1() -> CharPartition {
1200 let mut p = CharPartition::new();
1201 p.push('0' as u32, '9' as u32);
1202 p.push('Z' as u32, 'Z' as u32);
1203 p.push('f' as u32, 'q' as u32);
1204 p
1205 }
1206
1207 fn example2() -> CharPartition {
1208 let mut p = CharPartition::new();
1209 p.push('0' as u32, '0' as u32);
1210 p.push('A' as u32, 'G' as u32);
1211 p.push('H' as u32, 'M' as u32);
1212 p.push('W' as u32, 'Z' as u32);
1213 p.push('a' as u32, 'n' as u32);
1214 p.push('q' as u32, 'r' as u32);
1215 p
1216 }
1217
1218 #[test]
1219 fn test_simple() {
1220 let p1 = CharPartition::new();
1221 let p2 = example1();
1222 let p3 = example1();
1223 let p4 = example2();
1224 let p5 = CharPartition::from_set(&CharSet::all_chars());
1225
1226 assert!(good_partition(&p1));
1227 assert!(good_partition(&p2));
1228 assert!(good_partition(&p4));
1229 assert!(good_partition(&p5));
1230
1231 assert!(!p1.empty_complement());
1232 assert!(p1.pick_complement() == 0);
1233 assert!(!p2.empty_complement());
1234 assert!(p2.pick_complement() == 0);
1235 assert!(!p4.empty_complement());
1236 assert!(p4.pick_complement() == 0);
1237 assert!(p5.empty_complement());
1238
1239 assert_eq!(&p2, &p3);
1240 assert_ne!(&p2, &p4);
1241 assert_ne!(&p1, &p2);
1242 assert_ne!(&p1, &p4);
1243 assert_ne!(&p1, &p5);
1244
1245 println!("Empty partition: {}", &p1);
1246 println!("Example1: {}", &p2);
1247 println!("Example2: {}", &p4);
1248 println!("All chars: {}", &p5);
1249 }
1250
1251 #[test]
1252 fn test_from_list() {
1253 let v = [
1254 CharSet::range(120, 400),
1255 CharSet::range(0, 10),
1256 CharSet::range(1000, 2000),
1257 ];
1258
1259 match CharPartition::try_from_list(&v) {
1260 Ok(p) => {
1261 println!("From list succeeded: {}", &p);
1262 assert_eq!(p.len(), 3);
1263 assert_eq!(p.get(0), (0, 10));
1264 assert_eq!(p.get(1), (120, 400));
1265 assert_eq!(p.get(2), (1000, 2000));
1266 assert!(good_partition(&p));
1267 }
1268 Err(e) => panic!("Partition::try_from_list failed with error {}", e),
1269 }
1270
1271 let w = [
1272 CharSet::range(120, 400),
1273 CharSet::range(1000, 2000),
1274 CharSet::range(0, 10),
1275 CharSet::range(100, 200),
1276 ];
1277
1278 match CharPartition::try_from_list(&w) {
1279 Ok(_) => panic!("Partition::try_from_list should have failed"),
1280 Err(e) => println!("Partition::try_from_list failed with error {e} as expected"),
1281 }
1282 }
1283
1284 #[test]
1285 fn test_search() {
1286 use super::ClassId::*;
1287
1288 let p = CharPartition::new();
1289
1290 assert_eq!(p.class_of_char('a' as u32), Complement);
1291 assert_eq!(p.class_of_char(0), Complement);
1292 assert_eq!(p.class_of_char(MAX_CHAR), Complement);
1293
1294 let p2 = example1();
1295
1296 assert_eq!(p2.class_of_char(10), Complement);
1297 assert_eq!(p2.class_of_char('0' as u32), Interval(0));
1298 assert_eq!(p2.class_of_char('5' as u32), Interval(0));
1299 assert_eq!(p2.class_of_char('9' as u32), Interval(0));
1300 assert_eq!(p2.class_of_char('A' as u32), Complement);
1301 assert_eq!(p2.class_of_char('Z' as u32), Interval(1));
1302 assert_eq!(p2.class_of_char('e' as u32), Complement);
1303 assert_eq!(p2.class_of_char('g' as u32), Interval(2));
1304 assert_eq!(p2.class_of_char('z' as u32), Complement);
1305
1306 let p3 = example2();
1307 assert_eq!(p3.class_of_char(10), Complement);
1308 assert_eq!(p3.class_of_char('0' as u32), Interval(0));
1309 assert_eq!(p3.class_of_char('5' as u32), Complement);
1310 assert_eq!(p3.class_of_char('9' as u32), Complement);
1311 assert_eq!(p3.class_of_char('A' as u32), Interval(1));
1312 assert_eq!(p3.class_of_char('F' as u32), Interval(1));
1313 assert_eq!(p3.class_of_char('G' as u32), Interval(1));
1314 assert_eq!(p3.class_of_char('H' as u32), Interval(2));
1315 assert_eq!(p3.class_of_char('L' as u32), Interval(2));
1316 assert_eq!(p3.class_of_char('O' as u32), Complement);
1317 assert_eq!(p3.class_of_char('W' as u32), Interval(3));
1318 assert_eq!(p3.class_of_char('Z' as u32), Interval(3));
1319 assert_eq!(p3.class_of_char('^' as u32), Complement);
1320 assert_eq!(p3.class_of_char('e' as u32), Interval(4));
1321 assert_eq!(p3.class_of_char('g' as u32), Interval(4));
1322 assert_eq!(p3.class_of_char('p' as u32), Complement);
1323 assert_eq!(p3.class_of_char('q' as u32), Interval(5));
1324 assert_eq!(p3.class_of_char('r' as u32), Interval(5));
1325 assert_eq!(p3.class_of_char('s' as u32), Complement);
1326 assert_eq!(p3.class_of_char('z' as u32), Complement);
1327
1328 let p4 = CharPartition::from_set(&CharSet::all_chars());
1329 assert_eq!(p4.class_of_char(0), Interval(0));
1330 assert_eq!(p4.class_of_char(MAX_CHAR), Interval(0));
1331 }
1332
1333 #[test]
1334 fn test_merge() {
1335 let v = vec![CharPartition::new(), example1(), example2()];
1336
1337 for p in &v {
1338 for q in &v {
1339 let m = merge_partitions(p, q);
1340 println!("Merge({}, {}) = {}", p, q, &m);
1341
1342 assert!(good_partition(&m));
1343
1344 if p.is_empty() {
1345 assert_eq!(&m, q);
1346 }
1347 if q.is_empty() {
1348 assert_eq!(&m, p);
1349 }
1350 if p == q {
1351 assert_eq!(&m, p);
1352 }
1353 }
1354 }
1355 }
1356
1357 #[test]
1358 fn test_inter() {
1359 let a = CharSet::singleton(0);
1360 let b = CharSet::range(1, 20);
1361 let c = CharSet::range(30, 60);
1362 let d = CharSet::range(0, 30);
1363
1364 assert_eq!(a.inter(&a), Some(a));
1365 assert_eq!(a.inter(&b), None);
1366 assert_eq!(a.inter(&c), None);
1367 assert_eq!(a.inter(&d), Some(a));
1368
1369 assert_eq!(b.inter(&a), None);
1370 assert_eq!(b.inter(&b), Some(b));
1371 assert_eq!(b.inter(&c), None);
1372 assert_eq!(b.inter(&d), Some(b));
1373
1374 assert_eq!(c.inter(&d), Some(CharSet::singleton(30)));
1375 }
1376
1377 #[test]
1378 fn test_union() {
1379 let a = CharSet::singleton(0);
1380 let b = CharSet::range(1, 20);
1381 let c = CharSet::range(30, 60);
1382 let d = CharSet::range(0, 30);
1383
1384 assert_eq!(a.union(&a), Some(a));
1385 assert_eq!(a.union(&b), Some(CharSet::range(0, 20)));
1386 assert_eq!(a.union(&c), None);
1387 assert_eq!(a.union(&d), Some(d));
1388
1389 assert_eq!(b.union(&a), Some(CharSet::range(0, 20)));
1390 assert_eq!(b.union(&b), Some(b));
1391 assert_eq!(b.union(&c), None);
1392 assert_eq!(b.union(&d), Some(d));
1393
1394 assert_eq!(c.union(&d), Some(CharSet::range(0, 60)));
1395 }
1396
1397 #[test]
1398 fn test_cover() {
1399 let v = vec![CharPartition::new(), example1(), example2()];
1400 let i = vec![
1401 CharSet::singleton('a' as u32),
1402 CharSet::range('0' as u32, '9' as u32),
1403 CharSet::range('a' as u32, 'z' as u32),
1404 CharSet::range('h' as u32, 'q' as u32),
1405 CharSet::singleton('f' as u32),
1406 CharSet::singleton('q' as u32),
1407 CharSet::all_chars(),
1408 CharSet::singleton(0),
1409 CharSet::singleton(MAX_CHAR),
1410 CharSet::range(0, 'z' as u32),
1411 CharSet::range('z' as u32, MAX_CHAR),
1412 ];
1413
1414 fn check_covered(p: &CharPartition, test: &CharSet, i: usize) -> bool {
1415 i < p.len() && p.start(i) <= test.start && test.end <= p.end(i)
1416 }
1417
1418 fn check_disjoint(p: &[CharSet], test: &CharSet) -> bool {
1419 p.iter()
1420 .all(|set| test.end < set.start || set.end < test.start)
1421 }
1422
1423 fn check_overlap(p: &[CharSet], test: &CharSet) -> bool {
1424 p.iter().any(|set| {
1425 (test.start < set.start && set.start <= test.end)
1426 || (test.start <= set.end && set.end < test.end)
1427 })
1428 }
1429
1430 for p in &v {
1431 println!("Partition: {p}");
1432 for set in &i {
1433 let c = p.interval_cover(set);
1434 println!("Cover for {set} = {c}");
1435
1436 match c {
1437 CoverResult::CoveredBy(i) => {
1438 assert!(check_covered(p, set, i))
1439 }
1440 CoverResult::DisjointFromAll => {
1441 assert!(check_disjoint(&p.list, set))
1442 }
1443 CoverResult::Overlaps => {
1444 assert!(check_overlap(&p.list, set))
1445 }
1446 }
1447 }
1448 println!();
1449 }
1450 }
1451}