smt_str/alphabet/
mod.rs

1//! Character ranges and alphabets for SMT-LIB strings.
2//!
3//! This module provides types for describing and manipulating sets of characters.
4//! In SMT-LIB, a single character is represented as a Unicode code point in the range from `0` to `0x2FFFF` inclusive (defined in the `SmtChar` type).
5//! A [`CharRange`] is a contiguous inclusive range of characters, defined by a start and an end character.
6//! An [`Alphabet`] is a set of `CharRange`s, representing the union of all characters in the ranges.
7//!
8//! ```rust
9//! use smt_str::alphabet::{CharRange, Alphabet};
10//!
11//! // This range contains all characters from 'a' to 'z' inclusive
12//! let alpha = CharRange::new('a', 'z');
13//! assert_eq!(alpha.len(), 26);
14//! assert!(alpha.contains('a'));
15//! assert!(alpha.contains('z'));
16//! assert!(!alpha.contains('A'));
17//!
18//! // This range contains all character from '0' to '9' inclusive
19//! let digits = CharRange::new('0', '9');
20//! assert_eq!(digits.len(), 10);
21//! assert!(digits.contains('0'));
22//! assert!(digits.contains('9'));
23//! assert!(!digits.contains('a'));
24//!
25//! // The union of the two ranges is an alphabet that contains all lowercase letters and digits
26//!
27//! let mut alphabet = Alphabet::default();
28//! alphabet.insert(alpha);
29//! alphabet.insert(digits);
30//! assert_eq!(alphabet.len(), 36);
31//! assert!(alphabet.contains('a'));
32//! assert!(alphabet.contains('z'));
33//! assert!(alphabet.contains('0'));
34//! assert!(alphabet.contains('9'));
35//! ```
36//!
37//! The [`Alphabet`] provides operations supports operations such as union, intersection, and complement. The type always maintains the most compact representation of the alphabet, i.e., it
38//! merges overlapping and adjacent ranges into a single range.
39//! For example, the alphabet that contains all lowercase letters and digits is represented as a single range `[0-9a-z]`. See [`Alphabet`] for more details.
40//!
41//! A different representation of the alphabet is provided by the [`AlphabetPartition`] type, which divides the SMT-LIB alphabet into a set of disjoint partitions.
42//! Each partition is a contiguous range of characters that are treated. The partitioning can be refined by splitting partitions into smaller partitions.
43//! See [partition] module for more details.
44
45pub mod partition;
46
47use std::{cmp::Ordering, fmt::Display};
48
49use quickcheck::Arbitrary;
50
51use crate::{CharIterator, SmtChar};
52
53/// A range of characters [SmtChar]s defined by a start and an end character.
54///
55/// Every range covers all characters between the start and the end character, including the start and the end character themselves.
56/// If the start character is greater than the end character, the range is empty, otherwise it spans exactly `end - start + 1` characters.
57///
58/// # Ordering
59///
60/// Ranges are ordered lexicographically.
61/// That is, if `[s1, e1]` and `[s2, e2]` are two ranges, then `[s1, e1] < [s2, e2]` precisely if
62///
63/// - `s1 < s2`,
64/// - or `s1 == s2` and `e1 < e2`.
65///
66/// ## Example
67/// The range `['a', 'k']` is less than the range `['a', 'z']` which is less than the range `['b', 'z']`.
68/// ```rust
69/// use smt_str::alphabet::CharRange;
70/// assert!(CharRange::new('a', 'k') < CharRange::new('a', 'z'));
71/// assert!(CharRange::new('a', 'z') < CharRange::new('b', 'z'));
72/// ```
73///
74/// # Set operations
75///
76/// `CharRange` supports several common set-theoretic operations:
77///
78/// - **Intersection**: The intersection of two ranges is the range that contains all characters that are in both ranges (see [intersect](#method.intersect)).
79/// - **Complement**: The complement of a range is the range(s) containing all characters (in the SMT-LIB alphabet) that are not in the original range (see [complement](#method.complement)).
80/// - **Difference**: The difference of two ranges is the range(s) containing all characters that are in the first range but not in the second range (see [subtract](#method.subtract)).
81/// - **Subset check**: Check if a range is a subset of another range (see [covers](#method.covers)).
82///
83/// All above operations are performed in O(1) time.
84/// Unions of ranges are not directly supported by the CharRange type but are represented by the [Alphabet] type.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
86pub struct CharRange {
87    start: SmtChar,
88    end: SmtChar,
89}
90
91impl CharRange {
92    /// Create a new range of characters between two characters `start` and `end` (inclusive), where `start` and `end` can be any type that can be converted into a [SmtChar].
93    /// If `start > end`, the range is empty.
94    /// If `start == end`, the range contains a single character.
95    ///
96    /// # Example
97    /// ```
98    /// use smt_str::alphabet::CharRange;
99    /// use smt_str::SmtChar;
100    ///
101    /// let range = CharRange::new('a', 'z');
102    /// assert_eq!(range.len(), 26);
103    ///
104    /// let empty = CharRange::new('z', 'a');
105    /// assert!(empty.is_empty());
106    ///
107    /// let single = CharRange::new('a', 'a');
108    /// assert_eq!(single.is_singleton(), Some('a'.into()));
109    /// ```
110    pub fn new(start: impl Into<SmtChar>, end: impl Into<SmtChar>) -> Self {
111        CharRange {
112            start: start.into(),
113            end: end.into(),
114        }
115    }
116
117    /// Creates an empty range that contains no characters.
118    ///
119    /// # Example
120    /// ```
121    /// use smt_str::alphabet::CharRange;
122    ///
123    /// let range = CharRange::empty();
124    /// assert!(range.is_empty());
125    /// ```
126    pub fn empty() -> Self {
127        CharRange {
128            start: SmtChar::new(1),
129            end: SmtChar::new(0),
130        }
131    }
132
133    /// Create a range that contains a single character.
134    ///
135    /// # Example
136    /// ```
137    /// use smt_str::alphabet::CharRange;
138    ///
139    /// let range = CharRange::singleton('a');
140    /// assert_eq!(range.len(), 1);
141    /// assert!(range.contains('a'));
142    /// ```
143    pub fn singleton(c: impl Into<SmtChar>) -> Self {
144        let c = c.into();
145        CharRange { start: c, end: c }
146    }
147
148    /// Create a range that covers all characters in the SMT-LIB alphabet, i.e., all characters in the range `[0, 0x2FFFF]`.
149    ///
150    /// # Example
151    /// ```
152    /// use smt_str::alphabet::CharRange;
153    /// use smt_str::SmtChar;
154    ///
155    /// let range = CharRange::all();
156    /// assert_eq!(range.len(), 0x2FFFF + 1);
157    /// assert!(range.contains(0));
158    /// assert!(range.contains(SmtChar::MAX));
159    /// ```
160    pub fn all() -> Self {
161        CharRange {
162            start: SmtChar::new(0),
163            end: SmtChar::new(0x2FFFF),
164        }
165    }
166
167    /// Return the number of characters in the range.
168    ///
169    /// # Example
170    /// ```
171    /// use smt_str::alphabet::CharRange;
172    /// use smt_str::SmtChar;
173    ///
174    /// assert_eq!(CharRange::new('a', 'z').len(), 26);
175    /// assert_eq!(CharRange::singleton('a').len(), 1);
176    /// assert_eq!(CharRange::new('z', 'a').len(), 0);
177    /// ```
178    pub fn len(&self) -> usize {
179        if self.start > self.end {
180            0
181        } else {
182            ((self.end.0 - self.start.0) + 1) as usize
183        }
184    }
185
186    /// Returns an iterator over all characters in the range.
187    /// The iterator returns all characters in the range, including the start and the end character.
188    /// If the range is empty, the iterator returns no characters.
189    ///
190    /// # Example
191    /// ```
192    /// use smt_str::alphabet::CharRange;
193    /// use smt_str::SmtChar;
194    ///
195    /// let range = CharRange::new('a', 'c');
196    /// let mut iter = range.iter();
197    /// assert_eq!(iter.next(), Some('a'.into()));
198    /// assert_eq!(iter.next(), Some('b'.into()));
199    /// assert_eq!(iter.next(), Some('c'.into()));
200    /// assert_eq!(iter.next(), None);
201    /// ```
202    pub fn iter(&self) -> impl Iterator<Item = SmtChar> {
203        CharIterator::new(self.start, self.end)
204    }
205
206    /// Check if the range is empty. The range is empty if the start character is greater than the end character.
207    ///
208    /// # Example
209    /// ```
210    /// use smt_str::alphabet::CharRange;
211    /// use smt_str::SmtChar;
212    ///
213    /// assert!(CharRange::empty().is_empty());
214    /// assert!(!CharRange::new('a', 'z').is_empty());
215    /// assert!(!CharRange::singleton('a').is_empty());
216    /// assert!(CharRange::new('z', 'a').is_empty());
217    /// ```
218    pub fn is_empty(&self) -> bool {
219        self.len() == 0
220    }
221
222    /// Returns the lower bound of the range.
223    ///
224    /// # Example
225    /// ```
226    /// use smt_str::alphabet::CharRange;
227    /// use smt_str::SmtChar;
228    ///
229    /// let range = CharRange::new('a', 'z');
230    /// assert_eq!(range.start(), 'a'.into());
231    ///
232    /// // Does not check for empty range
233    /// let empty = CharRange::new('z', 'a');
234    /// assert_eq!(empty.start(), 'z'.into());
235    /// ```
236    pub fn start(&self) -> SmtChar {
237        self.start
238    }
239
240    /// Returns the upper bound of the range.
241    ///
242    /// # Example
243    /// ```
244    /// use smt_str::alphabet::CharRange;
245    /// use smt_str::SmtChar;
246    ///
247    /// let range = CharRange::new('a', 'z');
248    /// assert_eq!(range.end(), 'z'.into());
249    ///
250    /// // Does not check for empty range
251    /// let empty = CharRange::new('z', 'a');
252    /// assert_eq!(empty.end(), 'a'.into());
253    /// ```
254    pub fn end(&self) -> SmtChar {
255        self.end
256    }
257
258    /// Returns a character from the range.
259    /// If the range is empty, returns `None`.
260    ///
261    /// # Example
262    /// ```
263    /// use smt_str::alphabet::CharRange;
264    /// use smt_str::SmtChar;
265    ///
266    /// let range = CharRange::new('a', 'z');
267    /// assert!(range.choose().is_some());
268    /// assert_eq!(CharRange::empty().choose(), None);
269    /// ```
270    pub fn choose(&self) -> Option<SmtChar> {
271        if self.is_empty() {
272            None
273        } else {
274            Some(self.start)
275        }
276    }
277
278    /// Check if the range contains a single character.
279    ///
280    ///
281    /// # Example
282    /// ```
283    /// use smt_str::alphabet::CharRange;
284    /// use smt_str::SmtChar;
285    ///
286    /// assert!(CharRange::singleton('a').is_singleton().is_some());
287    /// assert!(CharRange::new('a', 'z').is_singleton().is_none());
288    /// assert!(CharRange::empty().is_singleton().is_none());
289    /// ```
290    pub fn is_singleton(&self) -> Option<SmtChar> {
291        if self.start == self.end {
292            Some(self.start)
293        } else {
294            None
295        }
296    }
297
298    /// Check if the range contains all characters in the SMT-LIB alphabet.
299    ///
300    /// # Example
301    /// ```
302    /// use smt_str::alphabet::CharRange;
303    /// use smt_str::SmtChar;
304    ///
305    /// assert!(CharRange::all().is_full());
306    /// assert!(CharRange::new(SmtChar::MIN, SmtChar::MAX).is_full());
307    /// assert!(!CharRange::empty().is_full());
308    /// assert!(!CharRange::new('a', 'z').is_full());
309    /// ```
310    pub fn is_full(&self) -> bool {
311        self.start == SmtChar::MIN && self.end == SmtChar::MAX
312    }
313
314    /// Check if a character is in the range.
315    /// Returns true if the character is in the range, false otherwise.
316    ///
317    /// # Example
318    /// ```
319    /// use smt_str::alphabet::CharRange;
320    /// use smt_str::SmtChar;
321    ///
322    /// let range = CharRange::new('a', 'z');
323    /// assert!(range.contains('a'));
324    /// assert!(range.contains('z'));
325    /// assert!(range.contains('m'));
326    /// assert!(range.contains(98)); // 'a'
327    /// assert!(!range.contains('A'));
328    /// assert!(!range.contains('0'));
329    /// ``````
330    pub fn contains(&self, c: impl Into<SmtChar>) -> bool {
331        let c = c.into();
332        self.start <= c && c <= self.end
333    }
334
335    /// Checks if this range is a superset of another range.
336    /// Returns true if this range contains all characters in the other range.
337    ///
338    /// # Example
339    /// ```
340    /// use smt_str::alphabet::CharRange;
341    /// use smt_str::SmtChar;
342    ///
343    /// let r1 = CharRange::new('a', 'c');
344    /// let r2 = CharRange::new('a', 'b');
345    /// let r3 = CharRange::new('b', 'f');
346    ///
347    /// assert!(r1.covers(&r2));
348    /// assert!(!r1.covers(&r3));
349    /// assert!(r1.covers(&r1));
350    /// ```
351    pub fn covers(&self, other: &Self) -> bool {
352        self.start <= other.start && self.end >= other.end
353    }
354
355    /// Return the intersection of two ranges.
356    /// The intersection of two ranges is the range that contains all characters that are in both ranges.
357    /// If the two ranges do not overlap, the intersection is empty.
358    ///
359    /// # Example
360    /// ```
361    /// use smt_str::alphabet::CharRange;
362    /// use smt_str::SmtChar;
363    ///
364    /// let r1 = CharRange::new('a', 'm');
365    /// let r2 = CharRange::new('a', 'z');
366    /// let r3 = CharRange::singleton('a');
367    /// let r4 = CharRange::new('y', 'z');
368    ///
369    /// assert_eq!(r1.intersect(&r2), CharRange::new('a', 'm'));
370    /// assert_eq!(r1.intersect(&r3), CharRange::singleton('a'));
371    /// assert!(r1.intersect(&r4).is_empty());
372    /// ```
373    pub fn intersect(&self, other: &Self) -> Self {
374        let start = self.start.max(other.start);
375        let end = self.end.min(other.end);
376        CharRange::new(start, end)
377    }
378
379    /// Returns the complement of the SMT-LIB alphabet w.r.t. this range.
380    /// If this range is `[a, b]`, the complement is a union of ranges containing
381    ///
382    /// - `[0, a-1]`  if `a > 0`,
383    /// - and `[b+1, MAX]` if `b < MAX`.
384    ///
385    /// Thus, the complement of an empty range is the full alphabet and the complement of the full alphabet is empty.
386    ///
387    /// # Example
388    /// ```
389    /// use smt_str::alphabet::CharRange;
390    /// use smt_str::SmtChar;
391    ///
392    /// let range = CharRange::new('a', 'd');
393    /// let complement = range.complement();
394    /// let mut iter = complement.into_iter();
395    /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from(0), SmtChar::from('a').saturating_prev())));
396    /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from('d').saturating_next(), SmtChar::MAX)));
397    /// assert_eq!(iter.next(), None);
398    ///
399    /// assert_eq!(CharRange::empty().complement(), vec![CharRange::all()]);
400    /// assert_eq!(CharRange::all().complement(), vec![]);
401    /// ```
402    pub fn complement(&self) -> Vec<CharRange> {
403        if self.is_empty() {
404            return vec![CharRange::all()];
405        }
406
407        let mut result = Vec::new();
408        if self.start > SmtChar::new(0) {
409            result.push(Self::new(0u32, self.start.saturating_prev()));
410        }
411        if self.end < SmtChar::MAX {
412            result.push(Self::new(self.end.saturating_next(), SmtChar::MAX));
413        }
414        result
415    }
416
417    /// Subtracts the other range from this ranges.
418    /// Returns the difference of two ranges.
419    /// The difference of two ranges is the range that contains all characters that are in the first range but not in the second range.
420    /// If the two ranges do not overlap, the difference is the first range itself.
421    /// If the first range is a subset of the second range, the difference is empty.
422    /// If the second range is a subset of the first range, the difference is the two ranges that are not overlapping.
423    /// If the two ranges are equal, the difference is empty.
424    ///
425    /// # Example
426    /// ```
427    /// use smt_str::alphabet::CharRange;
428    /// use smt_str::SmtChar;
429    ///
430    /// let r1 = CharRange::new('a', 'z');
431    /// let r2 = CharRange::new('a', 'm');
432    /// let r3 = CharRange::new('m', 'z');
433    /// let r4 = CharRange::singleton('c');
434    ///
435    /// assert_eq!(r1.subtract(&r2), vec![CharRange::new('n', 'z')]);
436    /// assert_eq!(r1.subtract(&r3), vec![CharRange::new('a', 'l')]);
437    /// assert_eq!(r2.subtract(&r3), vec![CharRange::new('a', 'l')]);
438    /// assert_eq!(r1.subtract(&r4), vec![CharRange::new('a', 'b'), CharRange::new('d', 'z')]);
439    /// assert_eq!(r2.subtract(&r2), vec![]);
440    /// ```
441    pub fn subtract(&self, other: &Self) -> Vec<CharRange> {
442        if self.is_empty() {
443            return vec![];
444        } else if other.is_empty() {
445            return vec![*self];
446        }
447        // No overlap, return self
448        if self.end < other.start || self.start > other.end {
449            return vec![*self];
450        }
451        let mut result = Vec::new();
452        // Left part before `other`
453        if self.start < other.start {
454            result.push(Self::new(self.start, other.start.saturating_prev()));
455        }
456        // Right part after `other`
457        if self.end > other.end {
458            result.push(Self::new(other.end.saturating_next(), self.end));
459        }
460        result
461    }
462}
463
464impl PartialOrd for CharRange {
465    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
466        Some(self.cmp(other))
467    }
468}
469impl Ord for CharRange {
470    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
471        match self.start.cmp(&other.start) {
472            std::cmp::Ordering::Equal => self.end.cmp(&other.end),
473            o => o,
474        }
475    }
476}
477
478/// A finite set of characters from the SMT-LIB alphabet, represented as a normalized set of disjoint [`CharRange`]s.
479///
480/// An `Alphabet` models a subset of the SMT-LIB character domain `[0x0000, 0x2FFFF]`.
481/// Internally, it is represented as a sorted list of [`CharRange`]s, where each range is:
482/// - non-empty,
483/// - non-overlapping with other ranges,
484/// - and not adjacent to any other range.
485///
486/// ## Normalization
487///
488/// The internal representation is kept normalized at all times:
489/// - Ranges are kept in ascending order by start character.
490/// - Overlapping or adjacent ranges are merged automatically upon insertion.
491/// - The internal structure is stored in a [`Vec`] of [`CharRange`]s.
492///
493// Keeping this invariant requires `O(n)` time for insertion, where `n` is the number of existing ranges in the alphabet.
494///
495/// ### Example
496///
497/// ```rust
498/// use smt_str::alphabet::{Alphabet, CharRange};
499///
500/// let mut a = Alphabet::default();
501/// a.insert(CharRange::new('a', 'f'));
502///
503/// // Overlapping range is merged
504/// a.insert(CharRange::new('d', 'h'));
505/// assert_eq!(a.iter_ranges().count(), 1);
506///
507/// // Adjacent range is also merged
508/// a.insert(CharRange::new('h', 'k'));
509/// assert_eq!(a.iter_ranges().count(), 1);
510/// ```
511///
512/// ## Set Operations
513///
514/// This type supports the following set operations, each returning a new normalized [`Alphabet`]:
515///
516/// - [`union`](#method.union): characters in either alphabet.
517/// - [`intersect`](#method.intersect): characters common to both alphabets.
518/// - [`subtract`](#method.subtract): characters in `self` but not in the other alphabet.
519/// - [`complement`](#method.complement): all characters not in the current alphabet (with respect to the SMT-LIB character domain).
520///
521/// All set operations are performed in O(n + m) time, where n and m are the number of ranges in the two alphabets.
522#[derive(Debug, Clone, Default, PartialEq, Eq)]
523pub struct Alphabet {
524    ranges: Vec<CharRange>,
525}
526
527impl Alphabet {
528    /// Returns the full alphabet, containing all characters in the SMT-LIB alphabet.
529    pub fn full() -> Self {
530        let mut alphabet = Alphabet::default();
531        alphabet.insert(CharRange::all());
532        alphabet
533    }
534
535    /// Returns an empty alphabet.
536    pub fn empty() -> Self {
537        Alphabet::default()
538    }
539
540    /// Check if the alphabet is empty.
541    ///
542    /// # Example
543    /// ```
544    /// use smt_str::alphabet::Alphabet;
545    /// let alphabet = Alphabet::empty();
546    /// assert!(alphabet.is_empty());
547    /// ```
548    pub fn is_empty(&self) -> bool {
549        self.ranges.is_empty()
550    }
551
552    /// The number of characters in the alphabet.
553    /// This is the sum of the number of characters in each range in the alphabet.
554    /// If the alphabet is empty, the length is zero.
555    ///
556    /// # Example
557    /// ```
558    /// use smt_str::alphabet::{Alphabet, CharRange};
559    ///
560    /// let mut alphabet = Alphabet::default();
561    /// alphabet.insert(CharRange::new('a', 'd')); // 4 characters
562    /// alphabet.insert(CharRange::new('x', 'z')); // 3 characters
563    /// assert_eq!(alphabet.len(), 7);
564    /// ```
565    pub fn len(&self) -> usize {
566        self.ranges.iter().map(|r| r.len()).sum()
567    }
568
569    /// Check if a character is in the alphabet.
570    /// Requires O(log n) time where n is the number of ranges in the alphabet.
571    ///
572    /// # Example
573    /// ```
574    /// use smt_str::alphabet::{Alphabet, CharRange};
575    ///
576    /// let mut alphabet = Alphabet::default();
577    /// alphabet.insert(CharRange::new('a', 'd'));
578    /// assert!(alphabet.contains('a'));
579    /// assert!(alphabet.contains('d'));
580    /// assert!(alphabet.contains('c'));
581    /// assert!(!alphabet.contains('e'));
582    /// ```
583    pub fn contains(&self, c: impl Into<SmtChar>) -> bool {
584        let c = c.into();
585        self.ranges
586            .binary_search_by(|range| {
587                if c < range.start() {
588                    // c is before the range
589                    Ordering::Greater
590                } else if c > range.end() {
591                    // c is after the range
592                    Ordering::Less
593                } else {
594                    // c is in the range
595                    Ordering::Equal
596                }
597            })
598            .is_ok()
599    }
600
601    /// Inserts a new character range into the alphabet, merging it with any overlapping or adjacent ranges.
602    ///
603    /// The internal representation is kept normalized: all ranges are non-overlapping, non-adjacent, and sorted by starting character.
604    /// This means that if the inserted range touches or overlaps existing ranges, it will be merged into a single contiguous range.
605    ///
606    /// Keeping this invariant requires `O(n)` time for insertion, where `n` is the number of existing ranges in the alphabet.
607    ///
608    /// Inserting an empty range has no effect.
609    ///
610    ///
611    /// # Examples
612    ///
613    /// ```rust
614    /// use smt_str::alphabet::{Alphabet, CharRange};
615    ///
616    /// let mut a = Alphabet::default();
617    /// a.insert(CharRange::new('a', 'c'));
618    /// a.insert(CharRange::new('d', 'f')); // Adjacent: merged into ['a','f']
619    ///
620    /// let ranges: Vec<_> = a.iter_ranges().collect();
621    /// assert_eq!(ranges, vec![CharRange::new('a', 'f')]);
622    /// ```
623    pub fn insert(&mut self, new: CharRange) {
624        if new.is_empty() {
625            return;
626        }
627
628        // Find where `new.start` fits in the sorted list
629        let mut i = match self.ranges.binary_search_by(|r| r.start.cmp(&new.start)) {
630            Ok(i) => i,
631            Err(i) => i,
632        };
633
634        let mut start = new.start;
635        let mut end = new.end;
636
637        // Merge backwards if the previous range overlaps or is adjacent `new`
638        if i > 0 {
639            let prev = self.ranges[i - 1];
640            if prev.end.saturating_next() >= new.start {
641                i -= 1;
642                start = start.min(prev.start);
643                end = end.max(prev.end);
644                self.ranges.remove(i); // remove prev
645            }
646        }
647
648        // Move forward and remove all overlapping/adjacent ranges
649        while i < self.ranges.len() {
650            let r = self.ranges[i];
651            if r.start > end.saturating_next() {
652                break;
653            }
654            start = start.min(r.start);
655            end = end.max(r.end);
656            self.ranges.remove(i); // shift left, now `i` points to the next range
657        }
658
659        // Insert merged range
660        self.ranges.insert(i, CharRange::new(start, end));
661    }
662
663    /// Insert a new character in the alphabet.
664    /// Equivalent to `insert(CharRange::singleton(c))`.
665    /// See [insert](#method.insert) for more details.
666    pub fn insert_char(&mut self, c: impl Into<SmtChar>) {
667        self.insert(CharRange::singleton(c));
668    }
669
670    /* Set operations */
671
672    /// Computes the union of two alphabets and returns the result.
673    ///
674    /// The union contains all characters that appear in either `self` or `other`.
675    ///
676    /// This method performs a single linear pass over the ranges in both alphabets to merge overlapping and adjacent ranges.
677    /// Requires O(n + m) space and time, where n and m are the number of ranges in the two alphabets.
678    ///
679    /// The resulting `Alphabet` preserves the invariant that its character ranges are sorted and normalized, i.e., non-overlapping and non-adjacent.
680    ///
681    ///
682    /// # Examples
683    /// ```
684    /// use smt_str::alphabet::{Alphabet, CharRange};
685    ///
686    /// let mut a1 = Alphabet::default();
687    /// a1.insert(CharRange::new('a', 'c'));
688    ///
689    /// let mut a2 = Alphabet::default();
690    /// a2.insert(CharRange::new('d', 'f'));
691    ///
692    /// let union = a1.union(&a2);
693    ///
694    /// let mut iter = union.iter_ranges();
695    /// assert_eq!(iter.next(), Some(CharRange::new('a', 'f'))); // merged
696    /// assert_eq!(iter.next(), None);
697    /// ```
698    pub fn union(&self, other: &Self) -> Self {
699        let mut merged = Vec::with_capacity(self.ranges.len() + other.ranges.len());
700        let mut i = 0;
701        let mut j = 0;
702        let mut active = CharRange::empty();
703
704        /// Merge helper: adds next range into active or flushes it to the merged vector
705        /// If the active range is empty, it is set to the next range.
706        /// If the active range overlaps with the next range, it is extended.
707        /// Otherwise, the active range is flushed to the merged vector and the next range becomes the new active range.
708        fn merge_into(active: &mut CharRange, next: CharRange, merged: &mut Vec<CharRange>) {
709            if active.is_empty() {
710                *active = next;
711            } else if active.end.saturating_next() >= next.start {
712                active.end = active.end.max(next.end);
713            } else {
714                merged.push(*active);
715                *active = next;
716            }
717        }
718
719        // Merge two sorted sequences
720        while i < self.ranges.len() && j < other.ranges.len() {
721            let r1 = self.ranges[i];
722            let r2 = other.ranges[j];
723            debug_assert!(!r1.is_empty());
724            debug_assert!(!r2.is_empty());
725
726            if r1.start <= r2.start {
727                merge_into(&mut active, r1, &mut merged);
728                i += 1;
729            } else {
730                merge_into(&mut active, r2, &mut merged);
731                j += 1;
732            }
733        }
734
735        // Handle the remaining ranges
736        while i < self.ranges.len() {
737            merge_into(&mut active, self.ranges[i], &mut merged);
738            i += 1;
739        }
740        while j < other.ranges.len() {
741            merge_into(&mut active, other.ranges[j], &mut merged);
742            j += 1;
743        }
744
745        if !active.is_empty() {
746            merged.push(active);
747        }
748
749        Alphabet { ranges: merged }
750    }
751
752    /// Computes the intersection of two alphabets and returns the result.
753    ///
754    /// The result contains all characters that are present in both `self` and `other`.
755    /// This implementation performs a linear-time merge over the sorted range lists
756    /// and computes pairwise intersections of overlapping ranges.
757    /// Hence, the function needs O(n + m) time and O(min(n,m)) space, where n and m are the number of ranges in the two alphabets.
758    ///
759    /// The result is guaranteed to be sorted, non-overlapping, and non-adjacent.
760    ///
761    /// # Example
762    /// ```
763    /// use smt_str::alphabet::{Alphabet, CharRange};
764    /// use smt_str::SmtChar;
765    ///
766    /// let mut a1 = Alphabet::default();
767    /// a1.insert(CharRange::new('a', 'd'));
768    /// a1.insert(CharRange::new('x', 'z'));
769    ///
770    /// let mut a2 = Alphabet::default();
771    /// a2.insert(CharRange::new('c', 'f'));
772    /// a2.insert(CharRange::new('y', 'z'));
773    ///     
774    /// let intersection = a1.intersect(&a2);
775    ///
776    /// let mut iter = intersection.iter_ranges();
777    /// assert_eq!(iter.next(), Some(CharRange::new('c', 'd')));
778    /// assert_eq!(iter.next(), Some(CharRange::new('y', 'z')));
779    /// assert_eq!(iter.next(), None);
780    /// ```
781    pub fn intersect(&self, other: &Self) -> Self {
782        let mut result = Vec::new();
783        let mut i = 0;
784        let mut j = 0;
785
786        while i < self.ranges.len() && j < other.ranges.len() {
787            let r1 = self.ranges[i];
788            let r2 = other.ranges[j];
789
790            if r1.end < r2.start {
791                // r1 ends before r2 starts
792                i += 1;
793            } else if r2.end < r1.start {
794                // r2 ends before r1 starts
795                j += 1;
796            } else {
797                // Ranges overlap, compute intersection is O(1)
798                result.push(r1.intersect(&r2));
799                // Move the range that ends first
800                if r1.end < r2.end {
801                    i += 1;
802                } else {
803                    j += 1;
804                }
805            }
806        }
807        Alphabet { ranges: result }
808    }
809
810    /// Computes the complement of the alphabet with respect to the full SMT-LIB character set.
811    ///
812    /// The result contains all characters that are **not present** in this alphabet.  
813    /// That is, if this alphabet contains characters `A`, the complement contains the set `[0x0000, 0x2FFFF] \ A`.
814    ///
815    /// Computing the complement is done in a single pass over the ranges in the alphabet.
816    /// It requires O(n) time and space, where n is the number of ranges in the alphabet.
817    ///
818    /// The output is a normalized `Alphabet` with non-overlapping, non-adjacent and sorted ranges.
819    ///
820    /// # Example
821    /// ```
822    /// use smt_str::alphabet::{Alphabet, CharRange};
823    /// use smt_str::SmtChar;
824    ///
825    /// let mut a = Alphabet::default();
826    /// a.insert(CharRange::new('a', 'd'));
827    /// a.insert(CharRange::new('x', 'z'));
828    ///
829    /// let complement = a.complement();
830    ///
831    /// let mut iter = complement.iter_ranges();
832    /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from(0), SmtChar::from('a').saturating_prev())));
833    /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from('d').saturating_next(), SmtChar::from('x').saturating_prev())));
834    /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from('z').saturating_next(), SmtChar::MAX)));
835    /// assert_eq!(iter.next(), None);
836    /// ```
837    pub fn complement(&self) -> Self {
838        let mut result = Alphabet::default();
839        let mut last = SmtChar::new(0);
840        for r in &self.ranges {
841            if last < r.start {
842                result.insert(CharRange::new(last, r.start.saturating_prev()));
843            }
844            last = match r.end.try_next() {
845                Some(s) => s,
846                None => return result, // We have reached the end of the SMT-LIB alphabet
847            }
848        }
849        if last <= SmtChar::MAX {
850            result.insert(CharRange::new(last, SmtChar::MAX));
851        }
852        result
853    }
854
855    /// Computes the set difference `self \ other` and returns a new `Alphabet`.
856    ///
857    /// The result contains all characters that are present in `self` but **not** in `other`.
858    ///
859    /// This operation is analogous to set difference and preserves normalization
860    /// (sorted, non-overlapping, non-adjacent ranges).
861    /// It needs O(n + m)  and O(n + m) space, where n and m are the number of ranges in the two alphabets.
862    ///
863    /// # Example
864    ///  ```
865    /// use smt_str::alphabet::{Alphabet, CharRange};
866    ///
867    /// let mut a = Alphabet::default();
868    /// a.insert(CharRange::new('a', 'f'));
869    ///
870    /// let mut b = Alphabet::default();
871    /// b.insert(CharRange::new('c', 'd'));
872    ///
873    /// let diff = a.subtract(&b);
874    /// let expected = [CharRange::new('a', 'b'), CharRange::new('e', 'f')];
875    ///
876    /// let ranges: Vec<_> = diff.iter_ranges().collect();
877    /// assert_eq!(ranges, expected);
878    /// ```
879    pub fn subtract(&self, other: &Self) -> Self {
880        // This could be done with a single pass over the ranges, which would not require allocating an alphabet for the complement.
881        // However, the current implementation is simpler and has the same runtime complexity.
882        let other_comp = other.complement();
883        self.intersect(&other_comp)
884    }
885
886    /// Return an iterator over the ranges in the alphabet.
887    pub fn iter_ranges(&self) -> impl Iterator<Item = CharRange> + '_ {
888        self.ranges.iter().copied()
889    }
890
891    /// Return an iterator over all characters in the alphabet.
892    pub fn iter(&self) -> impl Iterator<Item = SmtChar> + '_ {
893        self.iter_ranges().flat_map(|r| r.iter())
894    }
895}
896
897impl From<CharRange> for Alphabet {
898    fn from(r: CharRange) -> Self {
899        let mut alphabet = Alphabet::default();
900        alphabet.insert(r);
901        alphabet
902    }
903}
904
905impl FromIterator<CharRange> for Alphabet {
906    fn from_iter<T: IntoIterator<Item = CharRange>>(iter: T) -> Self {
907        let mut alphabet = Alphabet::default();
908        for r in iter {
909            alphabet.insert(r);
910        }
911        alphabet
912    }
913}
914
915impl FromIterator<SmtChar> for Alphabet {
916    fn from_iter<T: IntoIterator<Item = SmtChar>>(iter: T) -> Self {
917        let mut alphabet = Alphabet::default();
918        for c in iter {
919            alphabet.insert_char(c);
920        }
921        alphabet
922    }
923}
924
925impl Display for CharRange {
926    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
927        if self.is_empty() {
928            write!(f, "[]")
929        } else if self.start == self.end {
930            write!(f, "[{}]", self.start)
931        } else {
932            write!(f, "[{}-{}]", self.start, self.end)
933        }
934    }
935}
936
937impl Display for Alphabet {
938    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
939        write!(f, "{{")?;
940        for (i, r) in self.iter_ranges().enumerate() {
941            write!(f, "{}", r)?;
942            if i < self.ranges.len() - 1 {
943                write!(f, ", ")?;
944            }
945        }
946        write!(f, "}}")
947    }
948}
949
950impl Arbitrary for CharRange {
951    fn arbitrary(g: &mut quickcheck::Gen) -> Self {
952        let start = SmtChar::arbitrary(g);
953        let end = SmtChar::arbitrary(g);
954        CharRange::new(start.min(end), end.max(start))
955    }
956}
957
958impl Arbitrary for Alphabet {
959    fn arbitrary(g: &mut quickcheck::Gen) -> Self {
960        let mut alphabet = Alphabet::default();
961        let ranges: Vec<CharRange> = Arbitrary::arbitrary(g);
962        for r in ranges {
963            alphabet.insert(r);
964        }
965        alphabet
966    }
967}
968
969#[cfg(test)]
970mod tests {
971
972    use quickcheck_macros::quickcheck;
973
974    use crate::{
975        alphabet::{Alphabet, CharRange},
976        CharIterator, SmtChar,
977    };
978
979    #[quickcheck]
980    fn all_range_contains_all_chars(c: SmtChar) -> bool {
981        let range = super::CharRange::all();
982        range.contains(c)
983    }
984
985    #[quickcheck]
986    fn range_contain_all_between(s: SmtChar, e: SmtChar) {
987        let range = super::CharRange::new(s, e);
988        let mut len = 0;
989        for c in CharIterator::new(s, e) {
990            len += 1;
991            assert!(range.contains(c));
992        }
993        assert_eq!(len, range.len());
994    }
995
996    #[quickcheck]
997    fn range_intersect_self(r: CharRange) -> bool {
998        let i = r.intersect(&r);
999        i == r
1000    }
1001
1002    #[quickcheck]
1003    fn range_intersect_empty(r: CharRange) -> bool {
1004        let empty = CharRange::empty();
1005        let i = r.intersect(&empty);
1006        i.is_empty()
1007    }
1008
1009    #[quickcheck]
1010    fn range_intersect_all(r: CharRange) -> bool {
1011        let all = CharRange::all();
1012        let i = r.intersect(&all);
1013        i == r
1014    }
1015
1016    #[quickcheck]
1017    fn range_intersect_correct(r1: CharRange, r2: CharRange) {
1018        let i = r1.intersect(&r2);
1019
1020        for c in r1.iter() {
1021            if r2.contains(c) {
1022                assert!(i.contains(c));
1023            } else {
1024                assert!(!i.contains(c));
1025            }
1026        }
1027
1028        for c in r2.iter() {
1029            if r1.contains(c) {
1030                assert!(i.contains(c));
1031            } else {
1032                assert!(!i.contains(c));
1033            }
1034        }
1035
1036        for c in i.iter() {
1037            assert!(r1.contains(c));
1038            assert!(r2.contains(c));
1039        }
1040    }
1041
1042    #[quickcheck]
1043    fn range_difference_self(r: CharRange) -> bool {
1044        let d = r.subtract(&r);
1045        d.is_empty()
1046    }
1047
1048    #[quickcheck]
1049    fn range_difference_empty(r: CharRange) {
1050        let empty = CharRange::empty();
1051        let d = r.subtract(&empty);
1052        assert_eq!(d.len(), 1);
1053        assert_eq!(d[0], r)
1054    }
1055
1056    #[quickcheck]
1057    fn range_difference_all(r: CharRange) -> bool {
1058        let all = CharRange::all();
1059        let d = r.subtract(&all);
1060        d.is_empty()
1061    }
1062
1063    #[quickcheck]
1064    fn range_difference_correct(r1: CharRange, r2: CharRange) {
1065        let diff = r1.subtract(&r2);
1066
1067        for c in r1.iter() {
1068            if r2.contains(c) {
1069                assert!(!diff.iter().any(|r| r.contains(c)));
1070            } else {
1071                assert!(diff.iter().any(|r| r.contains(c)));
1072            }
1073        }
1074
1075        for c in r2.iter() {
1076            if r1.contains(c) {
1077                assert!(!diff.iter().any(|r| r.contains(c)));
1078            }
1079        }
1080
1081        for cr in diff.iter() {
1082            for c in cr.iter() {
1083                assert!(r1.contains(c), "{c} is diff {cr} but not in r1 = {r1}");
1084                assert!(!r2.contains(c));
1085            }
1086        }
1087    }
1088
1089    /* Alphabet */
1090
1091    fn alphabet(ranges: &[CharRange]) -> Alphabet {
1092        let mut a = Alphabet::default();
1093        for r in ranges {
1094            a.insert(*r);
1095        }
1096        a
1097    }
1098
1099    fn ranges_eq(a: &Alphabet, expected: &[CharRange]) {
1100        let actual: Vec<_> = a.iter_ranges().collect();
1101        let expected: Vec<_> = expected.to_vec();
1102        assert_eq!(actual, expected);
1103    }
1104
1105    fn range(start: char, end: char) -> CharRange {
1106        CharRange::new(start, end)
1107    }
1108
1109    #[quickcheck]
1110    fn test_alphabet_insert_empty(r: CharRange) {
1111        let mut alphabet = Alphabet::default();
1112        alphabet.insert(r);
1113        assert_eq!(alphabet.iter_ranges().next(), Some(r));
1114        assert_eq!(alphabet.iter_ranges().count(), 1);
1115    }
1116
1117    #[test]
1118    fn test_alphabet_insert_non_overlapping() {
1119        let a = alphabet(&[range('a', 'c'), range('e', 'g')]);
1120        ranges_eq(&a, &[range('a', 'c'), range('e', 'g')]);
1121    }
1122
1123    #[test]
1124    fn test_alphabet_insert_adjacent_right_merges() {
1125        let a = alphabet(&[range('a', 'c'), range('d', 'f')]);
1126        ranges_eq(&a, &[range('a', 'f')]);
1127    }
1128
1129    #[test]
1130    fn test_alphabet_insert_adjacent_left_merges() {
1131        let a = alphabet(&[range('d', 'f'), range('a', 'c')]);
1132        ranges_eq(&a, &[range('a', 'f')]);
1133    }
1134
1135    #[test]
1136    fn test_alphabet_insert_overlapping_right_merges() {
1137        let a = alphabet(&[range('a', 'c'), range('b', 'd')]);
1138        ranges_eq(&a, &[range('a', 'd')]);
1139    }
1140
1141    #[test]
1142    fn test_alphabet_insert_overlapping_left_merges() {
1143        let a = alphabet(&[range('b', 'd'), range('a', 'c')]);
1144        ranges_eq(&a, &[range('a', 'd')]);
1145    }
1146
1147    #[test]
1148    fn test_alphabet_insert_touching_multiple_merges_all() {
1149        let a = alphabet(&[
1150            range('a', 'c'),
1151            range('e', 'g'),
1152            range('i', 'k'),
1153            range('d', 'j'),
1154        ]);
1155        ranges_eq(&a, &[range('a', 'k')]);
1156    }
1157
1158    #[test]
1159    fn test_alphabet_insert_inside_existing_range_no_change() {
1160        let mut a = Alphabet::default();
1161        a.insert(range('a', 'z'));
1162        a.insert(range('d', 'f'));
1163        ranges_eq(&a, &[range('a', 'z')]);
1164    }
1165
1166    #[test]
1167    fn test_alphabet_test_alphabet_insert_subsuming_range() {
1168        let mut a = Alphabet::default();
1169        a.insert(range('d', 'f'));
1170        a.insert(range('a', 'z'));
1171        ranges_eq(&a, &[range('a', 'z')]);
1172    }
1173
1174    #[test]
1175    fn test_alphabet_insert_single_char_merges_with_existing() {
1176        let a = alphabet(&[range('a', 'c'), CharRange::singleton('d')]);
1177        ranges_eq(&a, &[range('a', 'd')]);
1178    }
1179
1180    #[test]
1181    fn test_alphabet_insert_disjoint_apart() {
1182        let a = alphabet(&[range('a', 'b'), range('x', 'z')]);
1183        ranges_eq(&a, &[range('a', 'b'), range('x', 'z')]);
1184    }
1185
1186    #[quickcheck]
1187    fn alphabet_non_normalized_two_ranges(r1: CharRange, r2: CharRange) {
1188        let mut alphabet = Alphabet::default();
1189        alphabet.insert(r1);
1190        alphabet.insert(r2);
1191
1192        let mut iter = alphabet.iter_ranges();
1193        let mut last = iter.next();
1194        for r in iter {
1195            // Not overlapping
1196            let i = last.unwrap().intersect(&r);
1197            assert!(
1198                i.is_empty(),
1199                "{}: Intersection of {} and {} = {}",
1200                alphabet,
1201                last.unwrap(),
1202                r,
1203                i
1204            );
1205            // Not adjacent:
1206            assert!(last.unwrap().end.saturating_next() < r.start);
1207            last = Some(r);
1208        }
1209    }
1210
1211    #[quickcheck]
1212    fn alphabet_non_normalized(ranges: Vec<CharRange>) {
1213        let mut alphabet = Alphabet::default();
1214        for r in ranges {
1215            alphabet.insert(r);
1216        }
1217
1218        let mut iter = alphabet.iter_ranges();
1219        let mut last = iter.next();
1220        for r in iter {
1221            // Not overlapping
1222            let i = last.unwrap().intersect(&r);
1223            assert!(
1224                i.is_empty(),
1225                "{}: Intersection of {} and {} = {}",
1226                alphabet,
1227                last.unwrap(),
1228                r,
1229                i
1230            );
1231            // Not adjacent:
1232            assert!(last.unwrap().end.saturating_next() < r.start);
1233            last = Some(r);
1234        }
1235    }
1236
1237    #[test]
1238    fn test_alphabet_contains() {
1239        let a = alphabet(&[range('a', 'f'), range('x', 'z')]);
1240        assert!(a.contains('a'));
1241        assert!(a.contains('e'));
1242        assert!(!a.contains('m'));
1243        assert!(a.contains('x'));
1244        assert!(!a.contains('w'));
1245    }
1246
1247    #[quickcheck]
1248    fn test_alphabet_contains_2(ranges: Vec<CharRange>) {
1249        let alphabet = alphabet(&ranges);
1250        for r in ranges {
1251            if let Some(c) = r.choose() {
1252                assert!(alphabet.contains(c));
1253            }
1254        }
1255    }
1256
1257    /* Set operations */
1258
1259    #[quickcheck]
1260    fn alphabet_union_self(a: Alphabet) -> bool {
1261        let u = a.union(&a);
1262        u == a
1263    }
1264
1265    #[quickcheck]
1266    fn alphabet_union_empty(a: Alphabet) -> bool {
1267        let empty = Alphabet::default();
1268        let u = a.union(&empty);
1269        u == a
1270    }
1271
1272    #[quickcheck]
1273    fn alphabet_union_all(a: Alphabet) -> bool {
1274        let all = Alphabet::full();
1275        let u = a.union(&all);
1276        u == all
1277    }
1278
1279    #[test]
1280    fn alphabet_union_disjoint() {
1281        let a1 = alphabet(&[range('a', 'c')]);
1282        let a2 = alphabet(&[range('x', 'z')]);
1283        let union = a1.union(&a2);
1284        ranges_eq(&union, &[range('a', 'c'), range('x', 'z')]);
1285    }
1286
1287    #[test]
1288    fn alphabet_union_overlapping() {
1289        let a1 = alphabet(&[range('a', 'f')]);
1290        let a2 = alphabet(&[range('d', 'k')]);
1291        let union = a1.union(&a2);
1292        ranges_eq(&union, &[range('a', 'k')]);
1293    }
1294
1295    #[test]
1296    fn alphabet_union_adjacent() {
1297        let a1 = alphabet(&[range('a', 'c')]);
1298        let a2 = alphabet(&[range('d', 'f')]);
1299        let union = a1.union(&a2);
1300        ranges_eq(&union, &[range('a', 'f')]);
1301    }
1302
1303    #[test]
1304    fn alphabet_union_multiple_merge() {
1305        let a1 = alphabet(&[range('a', 'c'), range('f', 'h')]);
1306        let a2 = alphabet(&[range('b', 'g')]);
1307        let union = a1.union(&a2);
1308        ranges_eq(&union, &[range('a', 'h')]);
1309    }
1310
1311    #[quickcheck]
1312    fn alphabet_union_correct(a1: Alphabet, a2: Alphabet) -> bool {
1313        let u = a1.union(&a2);
1314        for c in u.iter() {
1315            if a1.contains(c) || a2.contains(c) {
1316                assert!(u.contains(c));
1317            } else {
1318                assert!(!u.contains(c));
1319            }
1320        }
1321        true
1322    }
1323
1324    #[quickcheck]
1325    fn alphabet_intersect_self(a: Alphabet) -> bool {
1326        let i = a.intersect(&a);
1327        i == a
1328    }
1329
1330    #[quickcheck]
1331    fn alphabet_intersect_empty(a: Alphabet) -> bool {
1332        let empty = Alphabet::default();
1333        let i = a.intersect(&empty);
1334        i.is_empty()
1335    }
1336
1337    #[quickcheck]
1338    fn alphabet_intersect_all(a: Alphabet) -> bool {
1339        let all = Alphabet::full();
1340        let i = a.intersect(&all);
1341        i == a
1342    }
1343
1344    #[test]
1345    fn intersect_disjoint() {
1346        let a1 = alphabet(&[range('a', 'c')]);
1347        let a2 = alphabet(&[range('x', 'z')]);
1348        let intersection = a1.intersect(&a2);
1349        assert!(intersection.is_empty());
1350    }
1351
1352    #[test]
1353    fn intersect_overlap() {
1354        let a1 = alphabet(&[range('a', 'f')]);
1355        let a2 = alphabet(&[range('d', 'k')]);
1356        let intersection = a1.intersect(&a2);
1357        ranges_eq(&intersection, &[range('d', 'f')]);
1358    }
1359
1360    #[test]
1361    fn intersect_adjacent_is_empty() {
1362        let a1 = alphabet(&[range('a', 'c')]);
1363        let a2 = alphabet(&[range('d', 'f')]);
1364        let intersection = a1.intersect(&a2);
1365        assert!(intersection.is_empty());
1366    }
1367
1368    #[test]
1369    fn intersect_subset() {
1370        let a1 = alphabet(&[range('a', 'z')]);
1371        let a2 = alphabet(&[range('g', 'm')]);
1372        let intersection = a1.intersect(&a2);
1373        ranges_eq(&intersection, &[range('g', 'm')]);
1374    }
1375
1376    #[test]
1377    fn intersect_multiple_ranges() {
1378        let a1 = alphabet(&[range('a', 'd'), range('f', 'j')]);
1379        let a2 = alphabet(&[range('c', 'g')]);
1380        let intersection = a1.intersect(&a2);
1381        ranges_eq(&intersection, &[range('c', 'd'), range('f', 'g')]);
1382    }
1383
1384    #[quickcheck]
1385    fn alphabet_intersect_correct(a1: Alphabet, a2: Alphabet) -> bool {
1386        let i = a1.intersect(&a2);
1387        for c in i.iter() {
1388            assert!(a1.contains(c));
1389            assert!(a2.contains(c));
1390        }
1391        for c in a1.iter() {
1392            if a2.contains(c) {
1393                assert!(i.contains(c));
1394            } else {
1395                assert!(!i.contains(c));
1396            }
1397        }
1398        for c in a2.iter() {
1399            if a1.contains(c) {
1400                assert!(i.contains(c));
1401            } else {
1402                assert!(!i.contains(c));
1403            }
1404        }
1405        true
1406    }
1407
1408    #[quickcheck]
1409    fn alphabet_complement_self(a: Alphabet) -> bool {
1410        let c = a.complement();
1411        let u = a.union(&c);
1412        u == Alphabet::full()
1413    }
1414
1415    #[test]
1416    fn alphabet_complement_self_full() {
1417        let a = Alphabet::full();
1418        let c = a.complement();
1419        let u = a.union(&c);
1420        assert_eq!(u, Alphabet::full())
1421    }
1422
1423    #[test]
1424    fn complement_single_range() {
1425        let a = alphabet(&[range('b', 'd')]);
1426        let complement = a.complement();
1427
1428        let expected = vec![
1429            CharRange::new(SmtChar::from(0), SmtChar::from('b').saturating_prev()),
1430            CharRange::new(SmtChar::from('d').saturating_next(), SmtChar::MAX),
1431        ];
1432        ranges_eq(&complement, &expected);
1433    }
1434
1435    #[test]
1436    fn complement_multiple_ranges() {
1437        let a = alphabet(&[range('a', 'c'), range('f', 'h'), range('k', 'k')]);
1438        let complement = a.complement();
1439
1440        let expected = vec![
1441            CharRange::new(SmtChar::from(0), SmtChar::from('a').saturating_prev()),
1442            CharRange::new(
1443                SmtChar::from('c').saturating_next(),
1444                SmtChar::from('f').saturating_prev(),
1445            ),
1446            CharRange::new(
1447                SmtChar::from('h').saturating_next(),
1448                SmtChar::from('k').saturating_prev(),
1449            ),
1450            CharRange::new(SmtChar::from('k').saturating_next(), SmtChar::MAX),
1451        ];
1452
1453        ranges_eq(&complement, &expected);
1454    }
1455
1456    #[test]
1457    fn complement_range_from_zero() {
1458        let a = alphabet(&[CharRange::new(SmtChar::from(0), SmtChar::from('x'))]);
1459        let complement = a.complement();
1460
1461        let expected = vec![CharRange::new(
1462            SmtChar::from('x').saturating_next(),
1463            SmtChar::MAX,
1464        )];
1465
1466        ranges_eq(&complement, &expected);
1467    }
1468
1469    #[test]
1470    fn complement_range_to_max() {
1471        let a = alphabet(&[CharRange::new(SmtChar::from('x'), SmtChar::MAX)]);
1472        let complement = a.complement();
1473
1474        let expected = vec![CharRange::new(
1475            SmtChar::from(0),
1476            SmtChar::from('x').saturating_prev(),
1477        )];
1478
1479        ranges_eq(&complement, &expected);
1480    }
1481
1482    #[test]
1483    fn alphabet_full_complement_empty() {
1484        let empty = Alphabet::default();
1485        let full = Alphabet::full();
1486        assert_eq!(full.complement(), empty);
1487        assert_eq!(empty.complement(), full);
1488    }
1489
1490    #[quickcheck]
1491    fn alphabet_intersect_comp_self(a: Alphabet) -> bool {
1492        let i = a.intersect(&a.complement());
1493        i.is_empty()
1494    }
1495
1496    #[quickcheck]
1497    fn alphabet_union_comp_self(a: Alphabet) {
1498        let u = a.union(&a.complement());
1499        assert_eq!(u, Alphabet::full());
1500    }
1501
1502    #[test]
1503    fn alphabet_union_comp_self_but_last() {
1504        let range = CharRange::new(0u32, SmtChar::MAX.saturating_prev());
1505        let mut a = Alphabet::default();
1506        a.insert(range);
1507        let comp = a.complement();
1508        let u = a.union(&comp);
1509        assert_eq!(u, Alphabet::full());
1510    }
1511
1512    #[quickcheck]
1513    fn alphabet_insert_complement(r: CharRange) -> bool {
1514        let mut alphabet = Alphabet::default();
1515        alphabet.insert(r);
1516        let c = alphabet.complement();
1517        let u = alphabet.union(&c);
1518        u == Alphabet::full()
1519    }
1520
1521    #[test]
1522    fn subtract_disjoint() {
1523        let a = alphabet(&[range('a', 'f')]);
1524        let b = alphabet(&[range('x', 'z')]);
1525        let diff = a.subtract(&b);
1526        assert_eq!(diff, a);
1527    }
1528
1529    #[test]
1530    fn subtract_complete_overlap() {
1531        let a = alphabet(&[range('a', 'f')]);
1532        let b = alphabet(&[range('a', 'f')]);
1533        let diff = a.subtract(&b);
1534        assert!(diff.is_empty());
1535    }
1536
1537    #[quickcheck]
1538    fn subtract_empty_neutral(a: Alphabet) -> bool {
1539        let empty = Alphabet::default();
1540        let diff = a.subtract(&empty);
1541        diff == a
1542    }
1543
1544    #[quickcheck]
1545    fn subtract_all_empty(a: Alphabet) -> bool {
1546        let all = Alphabet::full();
1547        let diff = a.subtract(&all);
1548        diff.is_empty()
1549    }
1550
1551    #[test]
1552    fn subtract_partial_overlap_middle() {
1553        let a = alphabet(&[range('a', 'f')]);
1554        let b = alphabet(&[range('c', 'd')]);
1555        let diff = a.subtract(&b);
1556        ranges_eq(&diff, &[range('a', 'b'), range('e', 'f')]);
1557    }
1558
1559    #[test]
1560    fn subtract_leading_overlap() {
1561        let a = alphabet(&[range('a', 'f')]);
1562        let b = alphabet(&[range('a', 'c')]);
1563        let diff = a.subtract(&b);
1564        ranges_eq(&diff, &[range('d', 'f')]);
1565    }
1566
1567    #[test]
1568    fn subtract_trailing_overlap() {
1569        let a = alphabet(&[range('a', 'f')]);
1570        let b = alphabet(&[range('d', 'f')]);
1571        let diff = a.subtract(&b);
1572        ranges_eq(&diff, &[range('a', 'c')]);
1573    }
1574
1575    #[test]
1576    fn subtract_multiple_ranges() {
1577        let a = alphabet(&[range('a', 'z')]);
1578        let b = alphabet(&[range('b', 'd'), range('x', 'z')]);
1579        let diff = a.subtract(&b);
1580        ranges_eq(&diff, &[range('a', 'a'), range('e', 'w')]);
1581    }
1582
1583    #[test]
1584    fn subtract_self_is_empty() {
1585        let a = alphabet(&[range('a', 'z')]);
1586        assert!(a.subtract(&a).is_empty());
1587    }
1588
1589    #[test]
1590    fn subtract_from_empty_is_empty() {
1591        let a = Alphabet::default();
1592        let b = alphabet(&[range('a', 'z')]);
1593        assert!(a.subtract(&b).is_empty());
1594    }
1595
1596    #[quickcheck]
1597    fn subtract_correct(a: Alphabet, b: Alphabet) -> bool {
1598        let diff = a.subtract(&b);
1599        for c in diff.iter() {
1600            if a.contains(c) && !b.contains(c) {
1601                assert!(diff.contains(c));
1602            } else {
1603                assert!(!diff.contains(c));
1604            }
1605        }
1606        true
1607    }
1608}
smt_str/alphabet/mod.rs

smt_str/alphabet/
mod.rs