aws_smt_strings/
character_sets.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Character sets and alphabet partitions
6//!
7//! An SMT-LIB character is an unsigned integer in the range `[0..MAX_CHAR]`. We represent characters
8//! by `u32` integers in this range. The constant [MAX_CHAR] is defined in [smt_strings][crate::smt_strings].
9//!
10//! A character set is either a single character (e.g., `'A'`) or a range of characters (e.g., `['0'..'9']`).
11//! We represent both as `CharSet`s. A `CharSet` is a pair of two integers, `start` and `end`, where
12//! `start <= end` and `end <= MAX_CHAR`. This denotes the character interval `[start, end]`.
13//!
14//! A partition is a collection of disjoint character sets, sorted in increasing order.
15//! A partition is then a list of intervals:
16//!
17//! [a<sub>0</sub>, b<sub>0</sub>], [a<sub>1</sub>, b<sub>1</sub>], ..., [a<sub>k</sub>, b<sub>k</sub>]
18//! where a<sub>i</sub> <= b<sub>i</sub> and b<sub>i</sub> < a<sub>i+1</sub>.
19//!
20//! A partition defines an equivalence relation over characters: two characters are
21//! equivalent either if they belong to the same class [a<sub>i</sub>, b<sub>i</sub>] or
22//! if they're outside of all the intervals.
23//!
24//! A partition with n intervals defines then (n+1) classes:
25//! C<sub>0</sub>, C<sub>1</sub>, ..., C<sub>n-1</sub> and D.
26//! - For i=0,..., n-1, class C<sub>i</sub> is the interval [a<sub>i</sub>, b<sub>i</sub>].
27//! - Class D is the complementary class, that is, the complement of Union(C<sub>0</sub>, ..., C<sub>n-1</sub>).
28//!
29//! Note: the complementary class D may be empty.
30//!
31//! Each class in a partition can be identified by its [ClassId]:
32//! - `ClassId::Interval(i)` denotes the class C<sub>i</sub>, that is, the interval [a<sub>i</sub>, b<sub>i</sub>].
33//! - `ClassId::Complement` denotes the complementary class D.
34//!
35//! Partitions are used to decompose the SMT-LIB alphabet (of 196608 characters) into a typically much smaller number
36//! of equivalent classes.
37//! - For regular expressions, a partition divides the alphabet into derivative classes.
38//!   See [ReManager](crate::regular_expressions::ReManager).
39//! - For a finite-state automaton, a partition divides the alphabet into classes that have the
40//!   same transitions. A character partition is attached to every state `s` in an automaton and the successors
41//!   of `s` are defined for every class in this partition. See [Automaton](crate::automata::Automaton).
42//!
43
44use std::{
45    cmp::{max, min, Ordering},
46    fmt::Display,
47};
48
49use crate::{
50    errors::Error,
51    smt_strings::{char_to_smt, MAX_CHAR},
52};
53
54///
55/// Interval [start, end] where start <= end <= [MAX_CHAR].
56///
57/// This represents a range of characters.
58///
59#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
60pub struct CharSet {
61    start: u32,
62    end: u32,
63}
64
65impl Display for CharSet {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        let a = self.start;
68        let b = self.end;
69        if a == b {
70            write!(f, "{}", char_to_smt(a))
71        } else if a == 0 && b == MAX_CHAR {
72            write!(f, "\u{03a3}") // Sigma
73        } else {
74            write!(f, "[{}..{}]", char_to_smt(a), char_to_smt(b))
75        }
76    }
77}
78
79// partial order:
80//  [a, b] < [c, d] iff b < c
81impl PartialOrd for CharSet {
82    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
83        if self == other {
84            Some(Ordering::Equal)
85        } else if self.end < other.start {
86            Some(Ordering::Less)
87        } else if self.start > other.end {
88            Some(Ordering::Greater)
89        } else {
90            None
91        }
92    }
93}
94
95impl CharSet {
96    /// Construct the singleton interval [x, x]
97    ///
98    /// - the integer x must be a valid SMT-LIB character (i.e., 0 <= x <= [MAX_CHAR])
99    pub fn singleton(x: u32) -> CharSet {
100        debug_assert!(x <= MAX_CHAR);
101        CharSet { start: x, end: x }
102    }
103
104    /// Construct the interval [x, y]
105    ///
106    /// - requires x <= y <= [MAX_CHAR]
107    pub fn range(x: u32, y: u32) -> CharSet {
108        debug_assert!(x <= y && y <= MAX_CHAR);
109        CharSet { start: x, end: y }
110    }
111
112    /// Construct the interval [0, [MAX_CHAR]]
113    pub fn all_chars() -> CharSet {
114        CharSet {
115            start: 0,
116            end: MAX_CHAR,
117        }
118    }
119
120    /// Check whether x is in this interval
121    ///
122    /// # Example
123    ///
124    /// ```
125    /// use aws_smt_strings::character_sets::CharSet;
126    ///
127    /// let c = CharSet::range('a' as u32, 'z' as u32);
128    /// assert!(c.contains('g' as u32));
129    /// assert!(!c.contains('0' as u32));
130    /// ```
131    pub fn contains(&self, x: u32) -> bool {
132        self.start <= x && x <= self.end
133    }
134
135    /// Check whether other is a subset of this set
136    ///
137    /// # Example
138    ///
139    /// ```
140    /// use aws_smt_strings::character_sets::CharSet;
141    ///
142    /// let c = CharSet::range('a' as u32, 'z' as u32);
143    /// assert!(c.covers(&CharSet::singleton('g' as u32)));
144    /// assert!(c.covers(&CharSet::range('t' as u32, 'z' as u32)));
145    /// assert!(! c.covers(&CharSet::range(0, 'k' as u32)));
146    /// ```
147    pub fn covers(&self, other: &CharSet) -> bool {
148        debug_assert!(other.start <= other.end);
149        self.start <= other.start && other.end <= self.end
150    }
151
152    /// Check whether this set is before x
153    ///
154    /// If set is [a, b] this checks whether b < x.
155    ///
156    /// # Example
157    ///
158    /// ```
159    /// use aws_smt_strings::character_sets::CharSet;
160    ///
161    /// let c = CharSet::range('a' as u32, 'z' as u32);
162    /// assert!(c.is_before(127));
163    /// assert!(! c.is_before('c' as u32));
164    /// ```
165    pub fn is_before(&self, x: u32) -> bool {
166        self.end < x
167    }
168
169    /// Check whether this set is after x
170    ///
171    /// If set is the interval [a, b], this checks whether x < a.
172    ///
173    /// # Example
174    ///
175    /// ```
176    /// use aws_smt_strings::character_sets::CharSet;
177    ///
178    /// let c = CharSet::range('a' as u32, 'z' as u32);
179    /// assert!(! c.is_after(127));
180    /// assert!(c.is_after('Z' as u32));
181    /// ```
182    pub fn is_after(&self, x: u32) -> bool {
183        x < self.start
184    }
185
186    /// Get the size of this set
187    ///
188    /// Return the number of characters in the interval
189    ///
190    /// # Example
191    ///
192    /// ```
193    /// use aws_smt_strings::character_sets::CharSet;
194    ///
195    /// let c = CharSet::range('a' as u32, 'z' as u32);
196    /// assert_eq!(c.size(), 26);
197    /// ```
198    pub fn size(&self) -> u32 {
199        self.end - self.start + 1
200    }
201
202    ///
203    /// Check whether the set is a singleton
204    ///
205    /// # Example
206    ///
207    /// ```
208    /// use aws_smt_strings::character_sets::CharSet;
209    ///
210    /// let c = CharSet::range('a' as u32, 'z' as u32);
211    /// let d = CharSet::singleton('K' as u32);
212    /// assert!(d.is_singleton());
213    /// assert!(! c.is_singleton());
214    /// ```
215    pub fn is_singleton(&self) -> bool {
216        self.start == self.end
217    }
218
219    /// Check whether this set is the full alphabet
220    ///
221    /// # Example
222    ///
223    /// ```
224    /// use aws_smt_strings::{character_sets::CharSet, smt_strings::MAX_CHAR};
225    ///
226    /// let c = CharSet::range(0, MAX_CHAR);
227    /// let d = CharSet::range('a' as u32, 'z' as u32);
228    /// assert!(c.is_alphabet());
229    /// assert!(! d.is_alphabet());
230    /// ```
231    pub fn is_alphabet(&self) -> bool {
232        self.start == 0 && self.end == MAX_CHAR
233    }
234
235    /// Pick a character in the set
236    ///
237    /// # Example
238    ///
239    /// ```
240    /// use aws_smt_strings::character_sets::CharSet;
241    ///
242    /// let c = CharSet::range('a' as u32, 'z' as u32);
243    /// let d = c.pick();
244    /// assert!('a' as u32 <= d && d <= 'z' as u32);
245    /// ```
246    pub fn pick(&self) -> u32 {
247        self.start
248    }
249
250    /// Intersection of two intervals
251    ///
252    /// - return None if the intersection is empty
253    /// - return Some(charset) otherwise.
254    ///
255    /// # Example
256    ///
257    /// ```
258    /// use aws_smt_strings::character_sets::CharSet;
259    ///
260    /// let c = CharSet::range('a' as u32, 'z' as u32); // ['a', 'z']
261    /// let d = CharSet::range('t' as u32, 127);  // ['t', 127]
262    ///
263    /// // the intersection of c and d is ['t', 'z']
264    /// assert_eq!(c.inter(&d), Some(CharSet::range('t' as u32, 'z' as u32)));
265    /// ```
266    pub fn inter(&self, other: &CharSet) -> Option<CharSet> {
267        let max_start = max(self.start, other.start);
268        let min_end = min(self.end, other.end);
269        if max_start <= min_end {
270            Some(Self::range(max_start, min_end))
271        } else {
272            None
273        }
274    }
275
276    /// Intersection of an array of intervals
277    ///
278    /// See [inter][Self::inter]
279    /// - return None if the intersection is empty
280    /// - return Some(c) otherwise
281    pub fn inter_list(a: &[CharSet]) -> Option<CharSet> {
282        if a.is_empty() {
283            Some(Self::all_chars())
284        } else {
285            let mut result = a[0];
286            for s in &a[1..] {
287                match result.inter(s) {
288                    None => return None,
289                    Some(x) => result = x,
290                }
291            }
292            Some(result)
293        }
294    }
295
296    /// Union of two intervals
297    ///
298    /// - return None if the union is not an interval
299    /// - return Some(c) where c is a CharSet otherwise.
300    ///
301    /// # Example
302    ///
303    /// ```
304    /// use aws_smt_strings::character_sets::CharSet;
305    ///
306    /// let c = CharSet::range('a' as u32, 'z' as u32);
307    /// let d = CharSet::range('t' as u32, 127);
308    /// let e = CharSet::range('0' as u32, '9' as u32);
309    ///
310    /// assert_eq!(c.union(&d), Some(CharSet::range('a' as u32, 127)));
311    /// assert_eq!(c.union(&e), None);
312    /// ```
313    pub fn union(&self, other: &CharSet) -> Option<CharSet> {
314        let max_end = max(self.end, other.end);
315        // union([a, b], [c, d]) is an interval if a <= c-1 <= b or c <= a-1 <= d.
316        // we treat the case a==c separately to avoid underflow (when a=0 or c=0)
317        if self.start == other.start || (self.start < other.start && self.end >= other.start - 1) {
318            Some(Self::range(self.start, max_end))
319        } else if other.start < self.start && other.end >= self.start - 1 {
320            Some(Self::range(other.start, max_end))
321        } else {
322            None
323        }
324    }
325}
326
327///
328/// Class that covers an interval in a [CharPartition]
329///
330/// A CoverResult identifies the [CharPartition]'s class that contains
331/// an interval [a, b] if any.
332///
333/// For an interval [a, b] and a partition
334///  [a<sub>0</sub>, b<sub>0</sub>], ..., [a<sub>n</sub>, b<sub>n</sub>],
335/// CoverResult describes three possible outcomes:
336/// - `CoveredBy(i)` means that [a, b] is included in class C<sub>i</sub> = [a<sub>i</sub>, b<sub>i</sub>]
337/// - `DisjointFromAll` means that [a, b] does not intersect with any [a<sub>i</sub>, b<sub>i</sub>] so
338///   [a, b] is included in the complementary class.
339/// - `Overlaps` means that [a, b] and some interval [a<sub>i</sub>, b<sub>i</sub>] intersect,
340///   but [a, b] is not contained in this internal.
341///
342#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
343pub enum CoverResult {
344    /// CoveredBy(i) denotes the i-th interval [a<sub>i</sub>, b<sub>i</sub>] in the partition
345    CoveredBy(usize),
346    /// DisjointFromAll denotes the partition's complementary class
347    DisjointFromAll,
348    /// Overlaps means that [a, b] intersects some interval [a<sub>i</sub>, b<sub>i</sub>]
349    /// but is not fully included in this interval.
350    Overlaps,
351}
352
353impl Display for CoverResult {
354    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355        match self {
356            CoverResult::CoveredBy(i) => write!(f, "CoveredBy({i})"),
357            CoverResult::DisjointFromAll => write!(f, "DisjointFromAll"),
358            CoverResult::Overlaps => write!(f, "Overlaps"),
359        }
360    }
361}
362
363/// ClassId
364///
365/// A class id identifies a character class defined by a partition.
366/// It can either be Interval(i) where i is an index between 0 and the partition length-1 or
367/// Complement.
368/// - Interval(i) denotes the i-th interval in a partition
369/// - Complement denotes the complementary class (not an interval in general)
370#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
371pub enum ClassId {
372    /// Id of a partition interval
373    Interval(usize),
374    /// Id of the complementary partition
375    Complement,
376}
377
378impl Display for ClassId {
379    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
380        match self {
381            ClassId::Interval(i) => write!(f, "Interval({i})"),
382            ClassId::Complement => write!(f, "Complement"),
383        }
384    }
385}
386
387///
388/// Collection of disjoint character sets.
389///
390/// Each character set is an interval [a, b] where a <= b.
391///
392/// The character sets are sorted in increasing order so a
393/// partition of length n is a sequence
394///
395/// [a<sub>0</sub>, b<sub>0</sub>], [a<sub>1</sub>, b<sub>1</sub>], ..., [a<sub>n-1</sub>, b<sub>n-1</sub>]
396/// where a<sub>i</sub> <= b<sub>i</sub> and b<sub>i</sub> < a<sub>i+1</sub>.
397///
398/// This divides the alphabet into n+1 classes C<sub>0</sub>, ..., C<sub>n-1</sub>, and D:
399/// - C<sub>i</sub> = { x | a<sub>i</sub> <= x <= b<sub>i</sub> }
400/// - D = complement of Union(C<sub>0</sub>, ..., C<sub>n-1</sub>)
401///
402/// Each class in a partition can be identified by its [ClassId]:
403/// - `ClassId::Interval(i)` denotes the class C<sub>i</sub>, that is, the interval [a<sub>i</sub>, b<sub>i</sub>].
404/// - `ClassId::Complement` denotes the complementary class D.
405//
406// The partition structure includes an integer comp_witness, which is the smallest integer that doesn't
407// belong to any of the intervals C<sub>i</sub>. So if comp_witness <= [MAX_CHAR], it's an element of
408// the complementary class. Otherwise, comp_witness = [MAX_CHAR]+1 and the complementary class is empty.
409//
410#[derive(Debug, PartialEq, Eq, Default, Clone)]
411pub struct CharPartition {
412    list: Vec<CharSet>,
413    comp_witness: u32,
414}
415
416impl CharPartition {
417    /// Number of intervals in the partition
418    pub fn len(&self) -> usize {
419        self.list.len()
420    }
421
422    /// Check whether the partition is empty (i.e., no intervals)
423    pub fn is_empty(&self) -> bool {
424        self.list.is_empty()
425    }
426
427    /// Create an empty partition
428    pub fn new() -> Self {
429        CharPartition::default()
430    }
431
432    /// Partition with a single character set
433    ///
434    /// # Example
435    ///
436    /// ```
437    /// use aws_smt_strings::character_sets::*;
438    ///
439    /// let p = CharPartition::from_set(&CharSet::range('a' as u32, 'b' as u32));
440    /// assert_eq!(p.len(), 1);
441    /// assert_eq!(p.get(0), ('a' as u32, 'b' as u32))
442    /// ```
443    pub fn from_set(c: &CharSet) -> Self {
444        let witness = if c.start > 0 { 0 } else { c.end + 1 };
445        CharPartition {
446            list: vec![*c],
447            comp_witness: witness,
448        }
449    }
450
451    /// Build a partition from a CharSet iterator
452    ///
453    /// Succeeds if the CharSets are pairwise disjoint.
454    /// Fails otherwise.
455    ///
456    /// # Errors
457    ///
458    /// If some CharSets have a non-empty intersection,
459    /// return Err([Error::NonDisjointCharSets]).
460    ///
461    /// # Example
462    ///
463    /// ```
464    /// use aws_smt_strings::character_sets::*;
465    ///
466    /// # use std::error::Error;
467    /// #
468    /// # fn main() -> Result<(), Box<dyn Error>> {
469    /// let v = [
470    ///     CharSet::range(120, 400),
471    ///     CharSet::range(0, 10),
472    ///     CharSet::range(1000, 2000)];
473    ///
474    /// let p = CharPartition::try_from_iter(v.into_iter().copied())?;
475    /// assert_eq!(p.len(), 3);
476    /// assert_eq!(p.get(0), (0, 10));
477    /// assert_eq!(p.get(1), (120, 400));
478    /// assert_eq!(p.get(2), (1000, 2000));
479    /// # Ok(())
480    /// # }
481    /// ```
482    pub fn try_from_iter(iter: impl Iterator<Item = CharSet>) -> Result<Self, Error> {
483        let mut v: Vec<CharSet> = iter.collect();
484        let mut comp_witness = 0;
485        if !v.is_empty() {
486            v.sort_by_key(|c| c.start);
487            let mut prev = &v[0];
488            if prev.start <= comp_witness {
489                comp_witness = prev.end + 1;
490            }
491            for c in &v[1..] {
492                if c.start <= prev.end {
493                    return Err(Error::NonDisjointCharSets);
494                }
495                if c.start <= comp_witness {
496                    comp_witness = c.end + 1;
497                }
498                prev = c;
499            }
500        }
501        Ok(CharPartition {
502            list: v,
503            comp_witness,
504        })
505    }
506
507    /// Build a partition from a list of CharSets
508    ///
509    /// Succeeds if the CharSets in `a` are pairwise disjoint.
510    /// Fails otherwise.
511    ///
512    /// # Errors
513    ///
514    /// If some elements of `a` have a non-empty intersection,
515    /// return Err([Error::NonDisjointCharSets]).
516    ///
517    /// # Example
518    ///
519    /// ```
520    /// use aws_smt_strings::character_sets::*;
521    ///
522    /// # use std::error::Error;
523    /// #
524    /// # fn main() -> Result<(), Box<dyn Error>> {
525    /// let v = [
526    ///     CharSet::range(120, 400),
527    ///     CharSet::range(0, 10),
528    ///     CharSet::range(1000, 2000)];
529    ///
530    /// let p = CharPartition::try_from_list(&v)?;
531    /// assert_eq!(p.len(), 3);
532    /// assert_eq!(p.get(0), (0, 10));
533    /// assert_eq!(p.get(1), (120, 400));
534    /// assert_eq!(p.get(2), (1000, 2000));
535    ///
536    /// # Ok(())
537    /// # }
538    /// ```
539    ///
540    pub fn try_from_list(a: &[CharSet]) -> Result<Self, Error> {
541        Self::try_from_iter(a.iter().copied())
542    }
543
544    /// Add the interval [start, end] at the end of the partition.
545    ///
546    /// Requires `start <= end` and `end <= MAX_CHAR`.
547    /// If the partition is not empty, `start` must also be larger than the end of the
548    /// last interval in the partition.
549    ///
550    /// # Example
551    /// This constructs the two-interval partition `['0', '9'] ['Z', 'Z']`.
552    ///
553    /// ```
554    /// use aws_smt_strings::character_sets::*;
555    ///
556    /// let mut p = CharPartition::new();
557    /// p.push('0' as u32, '9' as u32);
558    /// p.push('Z' as u32, 'Z' as u32);
559    ///
560    /// assert_eq!(p.len(), 2);
561    /// assert_eq!(p.get(0), ('0' as u32, '9' as u32));
562    /// assert_eq!(p.get(1), ('Z' as u32, 'Z' as u32));
563    /// ```
564    pub fn push(&mut self, start: u32, end: u32) {
565        debug_assert!(start <= end && end <= MAX_CHAR);
566        debug_assert!(self.list.is_empty() || start > self.list.last().unwrap().end);
567        self.list.push(CharSet { start, end });
568        if start <= self.comp_witness {
569            self.comp_witness = end + 1;
570        }
571    }
572
573    /// Get the pair (start, end) of the i-the interval in the partition
574    ///
575    /// Intervals are indexed from 0 to `p.len() - 1` (inclusive).
576    /// - if i < p.len(), return the start and end of the i-th interval.
577    /// - if i >= p.len(), return the pair  `(MAX_CHAR+1, MAX_CHAR+1)`
578    ///
579    /// # Example
580    ///
581    /// ```
582    /// use aws_smt_strings::character_sets::*;
583    /// use aws_smt_strings::smt_strings::MAX_CHAR;
584    ///
585    /// // partition ['0', '9'] ['Z', 'Z']
586    /// let mut p = CharPartition::new();
587    /// p.push('0' as u32, '9' as u32);
588    /// p.push('Z' as u32, 'Z' as u32);
589    ///
590    /// assert_eq!(p.get(0), ('0' as u32, '9' as u32)); // first interval
591    /// assert_eq!(p.get(3), (MAX_CHAR + 1, MAX_CHAR + 1)); // out-of-range index
592    /// ```
593    pub fn get(&self, i: usize) -> (u32, u32) {
594        if i < self.list.len() {
595            let r = &self.list[i];
596            (r.start, r.end)
597        } else {
598            (MAX_CHAR + 1, MAX_CHAR + 1)
599        }
600    }
601
602    /// Get the i-the interval in the partition as a CharSet.
603    ///
604    /// # Panics
605    ///
606    /// If i is out of bound, that is, if i >= number of intervals in the partition.
607    ///
608    /// # Example
609    ///
610    /// ```
611    /// use aws_smt_strings::character_sets::*;
612    ///
613    /// let mut p = CharPartition::new();
614    /// p.push('a' as u32, 'z' as u32);
615    ///
616    /// assert_eq!(p.interval(0), CharSet::range('a' as u32, 'z' as u32));
617    /// ```
618    pub fn interval(&self, i: usize) -> CharSet {
619        self.list[i]
620    }
621
622    /// Get the start of the i-th interval
623    ///
624    /// - return `MAX_CHAR+1` if i is out of bound.
625    ///
626    /// # Example
627    ///
628    /// ```
629    /// use aws_smt_strings::character_sets::*;
630    /// use aws_smt_strings::smt_strings::MAX_CHAR;
631    ///
632    /// let mut p = CharPartition::new();
633    /// p.push('a' as u32, 'z' as u32);
634    ///
635    /// assert_eq!(p.start(0), 'a' as u32);
636    /// assert_eq!(p.start(1), MAX_CHAR+1);
637    /// ```
638    pub fn start(&self, i: usize) -> u32 {
639        if i < self.len() {
640            self.list[i].start
641        } else {
642            MAX_CHAR + 1
643        }
644    }
645
646    /// Get the end of the i-th interval
647    ///
648    /// - return `MAX_CHAR+1` if i is out of bound
649    ///
650    /// # Example
651    ///
652    /// ```
653    /// use aws_smt_strings::character_sets::*;
654    /// use aws_smt_strings::smt_strings::MAX_CHAR;
655    ///
656    /// let mut p = CharPartition::new();
657    /// p.push('a' as u32, 'z' as u32);
658    ///
659    /// assert_eq!(p.end(0), 'z' as u32);
660    /// assert_eq!(p.end(1), MAX_CHAR+1);
661    /// ```
662    pub fn end(&self, index: usize) -> u32 {
663        if index < self.len() {
664            self.list[index].end
665        } else {
666            MAX_CHAR + 1
667        }
668    }
669
670    /// Pick an element in interval i
671    ///
672    /// # Panics
673    ///
674    /// if i >= number of intervals in the partition
675    ///
676    /// # Example
677    ///
678    /// ```
679    /// use aws_smt_strings::character_sets::*;
680    ///
681    /// let mut p = CharPartition::new();
682    /// p.push('a' as u32, 'z' as u32);
683    ///
684    /// assert!('a' as u32 <= p.pick(0) && p.pick(0) <= 'z' as u32);
685    /// ```
686    pub fn pick(&self, i: usize) -> u32 {
687        self.list[i].start
688    }
689
690    /// Check whether the complementary class is empty
691    ///
692    /// # Example
693    ///
694    /// ```
695    /// use aws_smt_strings::character_sets::*;
696    /// use aws_smt_strings::smt_strings::MAX_CHAR;
697    ///
698    /// let mut p = CharPartition::new();
699    /// p.push(0, 127);
700    /// assert!(! p.empty_complement());
701    /// p.push(128, MAX_CHAR);
702    /// assert!(p.empty_complement());
703    /// ```
704    pub fn empty_complement(&self) -> bool {
705        self.comp_witness > MAX_CHAR
706    }
707
708    /// Pick an element in the complementary class
709    ///
710    /// - return `MAX_CHAR+1` if the complementary class is empty
711    ///
712    /// # Example
713    ///
714    /// ```
715    /// use aws_smt_strings::character_sets::*;
716    /// use aws_smt_strings::smt_strings::MAX_CHAR;
717    ///
718    /// // partition with a single interval ['a', 'z']
719    /// let p = CharPartition::from_set(&CharSet::range('a' as u32, 'z' as u32));
720    ///
721    /// // the complementary class is the union of [0, 'a' - 1] and ['z' + 1, MAX_CHAR]
722    /// let x = p.pick_complement();
723    /// assert!(x < ('a' as u32) || ('z' as u32) < x && x <= MAX_CHAR);
724    /// ```
725    pub fn pick_complement(&self) -> u32 {
726        self.comp_witness
727    }
728
729    /// Check whether a class id is valid
730    ///
731    /// # Example
732    ///
733    /// ```
734    /// use aws_smt_strings::character_sets::*;
735    ///
736    /// // partition with two intervals and three classes
737    /// let mut p = CharPartition::new();
738    /// p.push('0' as u32, '9' as u32);
739    /// p.push('Z' as u32, 'Z' as u32);
740    ///
741    /// assert!(p.valid_class_id(ClassId::Interval(0)));
742    /// assert!(p.valid_class_id(ClassId::Interval(1)));
743    /// assert!(p.valid_class_id(ClassId::Complement));
744    ///
745    /// assert!(! p.valid_class_id(ClassId::Interval(2)));
746    /// ```
747    pub fn valid_class_id(&self, cid: ClassId) -> bool {
748        use ClassId::*;
749        match cid {
750            Interval(i) => i < self.len(),
751            Complement => !self.empty_complement(),
752        }
753    }
754
755    /// Number of classes
756    ///
757    /// # Example
758    ///
759    /// ```
760    /// use aws_smt_strings::character_sets::*;
761    ///
762    /// // partition with two intervals and three classes
763    /// let mut p = CharPartition::new();
764    /// p.push('0' as u32, '9' as u32);
765    /// p.push('Z' as u32, 'Z' as u32);
766    ///
767    /// assert_eq!(p.num_classes(), 3);
768    /// ```
769    pub fn num_classes(&self) -> usize {
770        let n = self.len();
771        if self.empty_complement() {
772            n
773        } else {
774            n + 1
775        }
776    }
777
778    /// Pick an element in a class
779    ///
780    /// - cid identifies the class: if cid = Interval(i) then pick in the i-th interval of p
781    ///   (intervals are indexed from 0 to p.len() - 1)
782    /// - if cid is Complement then pick in the complementary class
783    ///
784    /// # Panics
785    ///
786    /// If cid is Interval(i) and i >= p.len() or cid is Complement and the complementary class
787    /// is empty.
788    ///
789    /// # Example
790    ///
791    /// ```
792    /// use aws_smt_strings::character_sets::{ClassId::*, *};
793    /// use aws_smt_strings::smt_strings::MAX_CHAR;
794    ///
795    /// let mut p = CharPartition::new();
796    /// p.push('0' as u32, '9' as u32);
797    /// p.push('Z' as u32, 'Z' as u32);
798    ///
799    /// let x = p.pick_in_class(Interval(1));
800    /// assert_eq!(x, 'Z' as u32); // since interval 1 is ['Z', 'Z']
801    ///
802    /// let y = p.pick_in_class(Complement);
803    /// assert!(0 <= y && y < ('0' as u32) ||
804    ///         ('9' as u32) < y && y < ('Z' as u32) ||
805    ///         ('Z' as u32) < y && y <= MAX_CHAR);
806    /// ```
807    pub fn pick_in_class(&self, cid: ClassId) -> u32 {
808        use ClassId::*;
809        match cid {
810            Interval(i) => self.pick(i),
811            Complement => {
812                assert!(!self.empty_complement());
813                self.pick_complement()
814            }
815        }
816    }
817
818    /// Iterator to go through all intervals in the partition
819    pub fn ranges(&self) -> impl Iterator<Item = &CharSet> {
820        self.list.iter()
821    }
822
823    /// Iterator to go through all valid class ids
824    pub fn class_ids(&self) -> ClassIdIterator<'_> {
825        ClassIdIterator {
826            partition: self,
827            counter: 0,
828        }
829    }
830
831    /// Iterator to pick one character in each class
832    /// (including the complementary class).
833    pub fn picks(&self) -> PickIterator<'_> {
834        PickIterator {
835            partition: self,
836            counter: 0,
837        }
838    }
839
840    ///
841    /// Search for the class that contains a character
842    ///
843    /// - Return `Interval(i)` if `a_i <= x <= b_i`
844    /// - Return `Complement` if x is not in any interval
845    ///
846    pub fn class_of_char(&self, x: u32) -> ClassId {
847        #[allow(clippy::many_single_char_names)]
848        fn binary_search(p: &[CharSet], x: u32) -> ClassId {
849            let mut i = 0;
850            let mut j = p.len();
851            while i < j {
852                let h = i + (j - i) / 2;
853                if p[h].contains(x) {
854                    return ClassId::Interval(h);
855                }
856                if p[h].is_before(x) {
857                    i = h + 1;
858                } else {
859                    j = h;
860                }
861            }
862            ClassId::Complement
863        }
864
865        binary_search(&self.list, x)
866    }
867
868    ///
869    /// Search for an interval that covers a character set
870    ///
871    /// The character set is an interval [a, b] where `0 <= a <= b <= MAX_CHAR`.
872    /// - Return `CoverResult::CoveredBy(i)` if [a, b] is included in the i-th
873    ///   interval of the partition.
874    /// - Return `CoverResult::DisjointFromAll` if [a, b] does not overlap any
875    ///   interval in the partition.
876    /// - Return `CoverResult::Overlaps` if [a, b] overlaps some interval in
877    ///   the partition but it not contained in this interval.
878    ///
879    /// # Example
880    ///
881    /// ```
882    /// use aws_smt_strings::character_sets::*;
883    ///
884    /// let p = CharPartition::from_set(&CharSet::range('0' as u32, '9' as u32));
885    /// let test1 = CharSet::range('4' as u32, '8' as u32);
886    /// let test2 = CharSet::range('a' as u32, 'z' as u32);
887    /// let test3 = CharSet::range('5' as u32, '?' as u32);
888    ///
889    /// assert_eq!(p.interval_cover(&test1), CoverResult::CoveredBy(0));
890    /// assert_eq!(p.interval_cover(&test2), CoverResult::DisjointFromAll);
891    /// assert_eq!(p.interval_cover(&test3), CoverResult::Overlaps);
892    /// ```
893    pub fn interval_cover(&self, set: &CharSet) -> CoverResult {
894        // search for the largest i such that a_i <= x
895        // return 0 if there's no such i (either because p is empty
896        // or because x < a_0)
897        #[allow(clippy::many_single_char_names)]
898        fn binary_search(p: &[CharSet], x: u32) -> usize {
899            let mut i = 0;
900            let mut j = p.len();
901            while i + 1 < j {
902                let h = i + (j - i) / 2;
903                if p[h].start <= x {
904                    i = h
905                } else {
906                    j = h
907                }
908            }
909            i
910        }
911
912        let a = set.start;
913        let b = set.end;
914        debug_assert!(a <= b && b <= MAX_CHAR);
915
916        let i = binary_search(&self.list, a);
917        let (a_i, b_i) = self.get(i);
918
919        if a < a_i {
920            debug_assert!(i == 0);
921            if b < a_i {
922                // a <= b < a_0
923                CoverResult::DisjointFromAll
924            } else {
925                // a < a_0 <= b
926                CoverResult::Overlaps
927            }
928        } else if a <= b_i {
929            if b <= b_i {
930                // a_i <= a <= b <= b_i
931                CoverResult::CoveredBy(i)
932            } else {
933                // a_i <= a <= b_i < b
934                CoverResult::Overlaps
935            }
936        } else {
937            // note: we know i < p.len() <= MAX_CHAR so i+1 can't overflow here
938            let next_ai = self.end(i + 1);
939            if b < next_ai {
940                // a_i <= b_i < a <= b < a_{i+1}
941                CoverResult::DisjointFromAll
942            } else {
943                // a_i <= b_i < a < a_{i+1} <= b
944                CoverResult::Overlaps
945            }
946        }
947    }
948
949    /// Get the class id for a character set
950    ///
951    /// - The class id is Interval(i) if the set is covered by interval
952    ///   [a<sub>i</sub>, b<sub>i</sub>] of the partition
953    /// - The class id is Complement if the set is covered by the partition's
954    ///   complementary class
955    /// - Otherwise, the class id is not defined.
956    ///
957    /// # Error
958    ///
959    /// If the class id is not defined for s, return Err([Error::AmbiguousCharSet])
960    ///
961    pub fn class_of_set(&self, s: &CharSet) -> Result<ClassId, Error> {
962        use ClassId::*;
963        use CoverResult::*;
964
965        match self.interval_cover(s) {
966            CoveredBy(i) => Ok(Interval(i)),
967            DisjointFromAll => Ok(Complement),
968            Overlaps => Err(Error::AmbiguousCharSet),
969        }
970    }
971
972    /// Check whether character set c is consistent with the partition.
973    ///
974    /// - return true if c is included in one partition's class or if
975    ///   c is included in the partition's complementary class.
976    ///
977    /// # Example
978    ///
979    /// ```
980    /// use aws_smt_strings::character_sets::*;
981    ///
982    /// let p = CharPartition::from_set(&CharSet::range('0' as u32, '9' as u32));
983    /// let test1 = CharSet::range('4' as u32, '8' as u32);
984    /// let test2 = CharSet::range('a' as u32, 'z' as u32);
985    /// let test3 = CharSet::range('5' as u32, '?' as u32);
986    ///
987    /// assert!(p.good_char_set(&test1));
988    /// assert!(p.good_char_set(&test2));
989    /// assert!(! p.good_char_set(&test3));
990    /// ```
991    pub fn good_char_set(&self, c: &CharSet) -> bool {
992        !matches!(self.interval_cover(c), CoverResult::Overlaps)
993    }
994}
995
996impl Display for CharPartition {
997    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
998        write!(f, "{{ ")?;
999        for r in self.ranges() {
1000            write!(f, "{r} ")?;
1001        }
1002        write!(f, "}}")
1003    }
1004}
1005
1006/// Iterator to go through all valid ClassId's in a partition
1007#[derive(Debug)]
1008pub struct ClassIdIterator<'a> {
1009    partition: &'a CharPartition,
1010    counter: usize,
1011}
1012
1013impl<'a> Iterator for ClassIdIterator<'a> {
1014    type Item = ClassId;
1015
1016    fn next(&mut self) -> Option<Self::Item> {
1017        let i = self.counter;
1018        self.counter += 1;
1019        if i < self.partition.len() {
1020            Some(ClassId::Interval(i))
1021        } else if i == self.partition.len() && !self.partition.empty_complement() {
1022            Some(ClassId::Complement)
1023        } else {
1024            None
1025        }
1026    }
1027
1028    fn size_hint(&self) -> (usize, Option<usize>) {
1029        let mut size = self.partition.len();
1030        if !self.partition.empty_complement() {
1031            size += 1;
1032        }
1033        (size, Some(size))
1034    }
1035}
1036
1037/// Iterator to pick a character in each class of a partition
1038///
1039/// If the partition consists of `n` intervals, the iterator will
1040/// pick an element in each interval first, then it will pick an
1041/// element in the complementary partition if this complementary
1042/// partition is not empty.
1043///
1044#[derive(Debug)]
1045pub struct PickIterator<'a> {
1046    partition: &'a CharPartition,
1047    counter: usize,
1048}
1049
1050impl<'a> Iterator for PickIterator<'a> {
1051    type Item = u32;
1052
1053    fn next(&mut self) -> Option<u32> {
1054        let i = self.counter;
1055        self.counter += 1;
1056        if i < self.partition.len() {
1057            Some(self.partition.pick(i))
1058        } else if i == self.partition.len() && !self.partition.empty_complement() {
1059            Some(self.partition.pick_complement())
1060        } else {
1061            None
1062        }
1063    }
1064}
1065
1066///
1067/// Merge two partitions p1 and p2
1068///
1069/// The result is the coarsest partition p that is a refinement of both p1 and p2.
1070/// This means that every interval of p1 and every interval of p2 is the union
1071/// of one or more successive intervals of p.
1072///
1073/// More precisely:
1074/// - let p1 = [a<sub>0</sub>, b<sub>0</sub>], ..., [a<sub>n</sub>, b<sub>n</sub>]
1075/// - let p2 = [c<sub>0</sub>, d<sub>0</sub>], ..., [c<sub>m</sub>, d<sub>m</sub>]
1076/// - let D1 = complement(Union [a<sub>i</sub>, b<sub>i</sub>])
1077/// - let D2 = complement(Union [c<sub>j</sub>, d<sub>j</sub>])
1078///
1079/// Then every interval of p is of one of the following forms
1080/// 1) [a<sub>i</sub>, b<sub>i</sub>] inter D2
1081/// 2) D1 inter [c<sub>j</sub>, d<sub>j</sub>]
1082/// 3) [a<sub>i</sub>, b<sub>j</sub>] inter [c<sub>j</sub>, d<sub>j</sub>]
1083///
1084/// where 0 <= i <= n and 0 <= j <= m.
1085///
1086/// # Example
1087///
1088/// ```
1089/// use aws_smt_strings::character_sets::*;
1090///
1091/// // partition p with intervals ['0', '9'] and ['a', 'g']
1092/// let mut p = CharPartition::new();
1093/// p.push('0' as u32, '9' as u32);
1094/// p.push('a' as u32, 'g' as u32);
1095///
1096/// // partition q with intervals ['5', '5'] and ['c', 'z']
1097/// let mut q = CharPartition::new();
1098/// q.push('5' as u32, '5' as u32);
1099/// q.push('c' as u32, 'z' as u32);
1100///
1101/// // r = merge(p, q) contains six intervals
1102/// // r = ['0', '4'] ['5', '5'] ['6', '9'] ['a', 'b'] ['c', 'g'] ['h', 'z']
1103/// let r = merge_partitions(&p, &q);
1104///
1105/// assert_eq!(r.len(), 6);
1106/// assert_eq!(r.get(0), ('0' as u32, '4' as u32));
1107/// assert_eq!(r.get(1), ('5' as u32, '5' as u32));
1108/// assert_eq!(r.get(2), ('6' as u32, '9' as u32));
1109/// assert_eq!(r.get(3), ('a' as u32, 'b' as u32));
1110/// assert_eq!(r.get(4), ('c' as u32, 'g' as u32));
1111/// assert_eq!(r.get(5), ('h' as u32, 'z' as u32));
1112/// ```
1113#[allow(clippy::many_single_char_names)]
1114pub fn merge_partitions(p1: &CharPartition, p2: &CharPartition) -> CharPartition {
1115    // consume interval i of p as [a, b]
1116    fn next_interval(p: &CharPartition, i: usize) -> (usize, u32, u32) {
1117        let (x, y) = p.get(i);
1118        (i + 1, x, y)
1119    }
1120
1121    let mut triple1 = next_interval(p1, 0);
1122    let mut triple2 = next_interval(p2, 0);
1123
1124    let mut result = CharPartition::new();
1125    while triple1.2 <= MAX_CHAR || triple2.2 <= MAX_CHAR {
1126        let (i, a, b) = triple1;
1127        let (j, c, d) = triple2;
1128        // [a, b] is a sub-range of the i-th interval of p1
1129        // [c, d] is a sub-range of the j-th interval of p2
1130        if b < c {
1131            // [a, b] < [c, d]
1132            result.push(a, b);
1133            triple1 = next_interval(p1, i);
1134        } else if d < a {
1135            // [c, d] < [a, b]
1136            result.push(c, d);
1137            triple2 = next_interval(p2, j);
1138        } else if c < a {
1139            // [c, d] and [a, b] overlap and c<a
1140            result.push(c, a - 1);
1141            triple2.1 = a; // next interval in p2 is [a, d]
1142        } else if a < c {
1143            // [c, d] and [a, b] overlap and a<c
1144            result.push(a, c - 1);
1145            triple1.1 = c; // next interval in p1 is [c, b]
1146        } else if b < d {
1147            // a=c and b<d: [a, b] is a prefix of [c, d]
1148            result.push(a, b);
1149            triple1 = next_interval(p1, i);
1150            triple2.1 = b + 1;
1151        } else if d < b {
1152            // a=c and d<b: [c, d] is a prefix of [a, b]
1153            result.push(c, d);
1154            triple1.1 = d + 1;
1155            triple2 = next_interval(p2, j);
1156        } else {
1157            // a=c and b=d
1158            result.push(a, b);
1159            triple1 = next_interval(p1, i);
1160            triple2 = next_interval(p2, j);
1161        }
1162    }
1163    result
1164}
1165
1166///
1167/// Merge a list of partitions
1168///
1169/// See [merge_partitions]
1170pub fn merge_partition_list<'a>(list: impl Iterator<Item = &'a CharPartition>) -> CharPartition {
1171    let mut result = CharPartition::new();
1172    for p in list {
1173        result = merge_partitions(&result, p)
1174    }
1175    result
1176}
1177
1178#[cfg(test)]
1179mod test {
1180    use super::*;
1181
1182    fn good_partition(p: &CharPartition) -> bool {
1183        let mut prev_end = MAX_CHAR + 1;
1184        for s in p.ranges() {
1185            if s.start > s.end {
1186                return false;
1187            }
1188            if prev_end <= MAX_CHAR && s.start <= prev_end {
1189                return false;
1190            }
1191            if s.start <= p.comp_witness && p.comp_witness <= s.end {
1192                return false;
1193            }
1194            prev_end = s.end;
1195        }
1196        true
1197    }
1198
1199    fn example1() -> CharPartition {
1200        let mut p = CharPartition::new();
1201        p.push('0' as u32, '9' as u32);
1202        p.push('Z' as u32, 'Z' as u32);
1203        p.push('f' as u32, 'q' as u32);
1204        p
1205    }
1206
1207    fn example2() -> CharPartition {
1208        let mut p = CharPartition::new();
1209        p.push('0' as u32, '0' as u32);
1210        p.push('A' as u32, 'G' as u32);
1211        p.push('H' as u32, 'M' as u32);
1212        p.push('W' as u32, 'Z' as u32);
1213        p.push('a' as u32, 'n' as u32);
1214        p.push('q' as u32, 'r' as u32);
1215        p
1216    }
1217
1218    #[test]
1219    fn test_simple() {
1220        let p1 = CharPartition::new();
1221        let p2 = example1();
1222        let p3 = example1();
1223        let p4 = example2();
1224        let p5 = CharPartition::from_set(&CharSet::all_chars());
1225
1226        assert!(good_partition(&p1));
1227        assert!(good_partition(&p2));
1228        assert!(good_partition(&p4));
1229        assert!(good_partition(&p5));
1230
1231        assert!(!p1.empty_complement());
1232        assert!(p1.pick_complement() == 0);
1233        assert!(!p2.empty_complement());
1234        assert!(p2.pick_complement() == 0);
1235        assert!(!p4.empty_complement());
1236        assert!(p4.pick_complement() == 0);
1237        assert!(p5.empty_complement());
1238
1239        assert_eq!(&p2, &p3);
1240        assert_ne!(&p2, &p4);
1241        assert_ne!(&p1, &p2);
1242        assert_ne!(&p1, &p4);
1243        assert_ne!(&p1, &p5);
1244
1245        println!("Empty partition: {}", &p1);
1246        println!("Example1: {}", &p2);
1247        println!("Example2: {}", &p4);
1248        println!("All chars: {}", &p5);
1249    }
1250
1251    #[test]
1252    fn test_from_list() {
1253        let v = [
1254            CharSet::range(120, 400),
1255            CharSet::range(0, 10),
1256            CharSet::range(1000, 2000),
1257        ];
1258
1259        match CharPartition::try_from_list(&v) {
1260            Ok(p) => {
1261                println!("From list succeeded: {}", &p);
1262                assert_eq!(p.len(), 3);
1263                assert_eq!(p.get(0), (0, 10));
1264                assert_eq!(p.get(1), (120, 400));
1265                assert_eq!(p.get(2), (1000, 2000));
1266                assert!(good_partition(&p));
1267            }
1268            Err(e) => panic!("Partition::try_from_list failed with error {}", e),
1269        }
1270
1271        let w = [
1272            CharSet::range(120, 400),
1273            CharSet::range(1000, 2000),
1274            CharSet::range(0, 10),
1275            CharSet::range(100, 200),
1276        ];
1277
1278        match CharPartition::try_from_list(&w) {
1279            Ok(_) => panic!("Partition::try_from_list should have failed"),
1280            Err(e) => println!("Partition::try_from_list failed with error {e} as expected"),
1281        }
1282    }
1283
1284    #[test]
1285    fn test_search() {
1286        use super::ClassId::*;
1287
1288        let p = CharPartition::new();
1289
1290        assert_eq!(p.class_of_char('a' as u32), Complement);
1291        assert_eq!(p.class_of_char(0), Complement);
1292        assert_eq!(p.class_of_char(MAX_CHAR), Complement);
1293
1294        let p2 = example1();
1295
1296        assert_eq!(p2.class_of_char(10), Complement);
1297        assert_eq!(p2.class_of_char('0' as u32), Interval(0));
1298        assert_eq!(p2.class_of_char('5' as u32), Interval(0));
1299        assert_eq!(p2.class_of_char('9' as u32), Interval(0));
1300        assert_eq!(p2.class_of_char('A' as u32), Complement);
1301        assert_eq!(p2.class_of_char('Z' as u32), Interval(1));
1302        assert_eq!(p2.class_of_char('e' as u32), Complement);
1303        assert_eq!(p2.class_of_char('g' as u32), Interval(2));
1304        assert_eq!(p2.class_of_char('z' as u32), Complement);
1305
1306        let p3 = example2();
1307        assert_eq!(p3.class_of_char(10), Complement);
1308        assert_eq!(p3.class_of_char('0' as u32), Interval(0));
1309        assert_eq!(p3.class_of_char('5' as u32), Complement);
1310        assert_eq!(p3.class_of_char('9' as u32), Complement);
1311        assert_eq!(p3.class_of_char('A' as u32), Interval(1));
1312        assert_eq!(p3.class_of_char('F' as u32), Interval(1));
1313        assert_eq!(p3.class_of_char('G' as u32), Interval(1));
1314        assert_eq!(p3.class_of_char('H' as u32), Interval(2));
1315        assert_eq!(p3.class_of_char('L' as u32), Interval(2));
1316        assert_eq!(p3.class_of_char('O' as u32), Complement);
1317        assert_eq!(p3.class_of_char('W' as u32), Interval(3));
1318        assert_eq!(p3.class_of_char('Z' as u32), Interval(3));
1319        assert_eq!(p3.class_of_char('^' as u32), Complement);
1320        assert_eq!(p3.class_of_char('e' as u32), Interval(4));
1321        assert_eq!(p3.class_of_char('g' as u32), Interval(4));
1322        assert_eq!(p3.class_of_char('p' as u32), Complement);
1323        assert_eq!(p3.class_of_char('q' as u32), Interval(5));
1324        assert_eq!(p3.class_of_char('r' as u32), Interval(5));
1325        assert_eq!(p3.class_of_char('s' as u32), Complement);
1326        assert_eq!(p3.class_of_char('z' as u32), Complement);
1327
1328        let p4 = CharPartition::from_set(&CharSet::all_chars());
1329        assert_eq!(p4.class_of_char(0), Interval(0));
1330        assert_eq!(p4.class_of_char(MAX_CHAR), Interval(0));
1331    }
1332
1333    #[test]
1334    fn test_merge() {
1335        let v = vec![CharPartition::new(), example1(), example2()];
1336
1337        for p in &v {
1338            for q in &v {
1339                let m = merge_partitions(p, q);
1340                println!("Merge({}, {}) = {}", p, q, &m);
1341
1342                assert!(good_partition(&m));
1343
1344                if p.is_empty() {
1345                    assert_eq!(&m, q);
1346                }
1347                if q.is_empty() {
1348                    assert_eq!(&m, p);
1349                }
1350                if p == q {
1351                    assert_eq!(&m, p);
1352                }
1353            }
1354        }
1355    }
1356
1357    #[test]
1358    fn test_inter() {
1359        let a = CharSet::singleton(0);
1360        let b = CharSet::range(1, 20);
1361        let c = CharSet::range(30, 60);
1362        let d = CharSet::range(0, 30);
1363
1364        assert_eq!(a.inter(&a), Some(a));
1365        assert_eq!(a.inter(&b), None);
1366        assert_eq!(a.inter(&c), None);
1367        assert_eq!(a.inter(&d), Some(a));
1368
1369        assert_eq!(b.inter(&a), None);
1370        assert_eq!(b.inter(&b), Some(b));
1371        assert_eq!(b.inter(&c), None);
1372        assert_eq!(b.inter(&d), Some(b));
1373
1374        assert_eq!(c.inter(&d), Some(CharSet::singleton(30)));
1375    }
1376
1377    #[test]
1378    fn test_union() {
1379        let a = CharSet::singleton(0);
1380        let b = CharSet::range(1, 20);
1381        let c = CharSet::range(30, 60);
1382        let d = CharSet::range(0, 30);
1383
1384        assert_eq!(a.union(&a), Some(a));
1385        assert_eq!(a.union(&b), Some(CharSet::range(0, 20)));
1386        assert_eq!(a.union(&c), None);
1387        assert_eq!(a.union(&d), Some(d));
1388
1389        assert_eq!(b.union(&a), Some(CharSet::range(0, 20)));
1390        assert_eq!(b.union(&b), Some(b));
1391        assert_eq!(b.union(&c), None);
1392        assert_eq!(b.union(&d), Some(d));
1393
1394        assert_eq!(c.union(&d), Some(CharSet::range(0, 60)));
1395    }
1396
1397    #[test]
1398    fn test_cover() {
1399        let v = vec![CharPartition::new(), example1(), example2()];
1400        let i = vec![
1401            CharSet::singleton('a' as u32),
1402            CharSet::range('0' as u32, '9' as u32),
1403            CharSet::range('a' as u32, 'z' as u32),
1404            CharSet::range('h' as u32, 'q' as u32),
1405            CharSet::singleton('f' as u32),
1406            CharSet::singleton('q' as u32),
1407            CharSet::all_chars(),
1408            CharSet::singleton(0),
1409            CharSet::singleton(MAX_CHAR),
1410            CharSet::range(0, 'z' as u32),
1411            CharSet::range('z' as u32, MAX_CHAR),
1412        ];
1413
1414        fn check_covered(p: &CharPartition, test: &CharSet, i: usize) -> bool {
1415            i < p.len() && p.start(i) <= test.start && test.end <= p.end(i)
1416        }
1417
1418        fn check_disjoint(p: &[CharSet], test: &CharSet) -> bool {
1419            p.iter()
1420                .all(|set| test.end < set.start || set.end < test.start)
1421        }
1422
1423        fn check_overlap(p: &[CharSet], test: &CharSet) -> bool {
1424            p.iter().any(|set| {
1425                (test.start < set.start && set.start <= test.end)
1426                    || (test.start <= set.end && set.end < test.end)
1427            })
1428        }
1429
1430        for p in &v {
1431            println!("Partition: {p}");
1432            for set in &i {
1433                let c = p.interval_cover(set);
1434                println!("Cover for {set} = {c}");
1435
1436                match c {
1437                    CoverResult::CoveredBy(i) => {
1438                        assert!(check_covered(p, set, i))
1439                    }
1440                    CoverResult::DisjointFromAll => {
1441                        assert!(check_disjoint(&p.list, set))
1442                    }
1443                    CoverResult::Overlaps => {
1444                        assert!(check_overlap(&p.list, set))
1445                    }
1446                }
1447            }
1448            println!();
1449        }
1450    }
1451}
aws_smt_strings/character_sets.rs

aws_smt_strings/
character_sets.rs