smt_str/alphabet/mod.rs
1//! Character ranges and alphabets for SMT-LIB strings.
2//!
3//! This module provides types for describing and manipulating sets of characters.
4//! In SMT-LIB, a single character is represented as a Unicode code point in the range from `0` to `0x2FFFF` inclusive (defined in the `SmtChar` type).
5//! A [`CharRange`] is a contiguous inclusive range of characters, defined by a start and an end character.
6//! An [`Alphabet`] is a set of `CharRange`s, representing the union of all characters in the ranges.
7//!
8//! ```rust
9//! use smt_str::alphabet::{CharRange, Alphabet};
10//!
11//! // This range contains all characters from 'a' to 'z' inclusive
12//! let alpha = CharRange::new('a', 'z');
13//! assert_eq!(alpha.len(), 26);
14//! assert!(alpha.contains('a'));
15//! assert!(alpha.contains('z'));
16//! assert!(!alpha.contains('A'));
17//!
18//! // This range contains all character from '0' to '9' inclusive
19//! let digits = CharRange::new('0', '9');
20//! assert_eq!(digits.len(), 10);
21//! assert!(digits.contains('0'));
22//! assert!(digits.contains('9'));
23//! assert!(!digits.contains('a'));
24//!
25//! // The union of the two ranges is an alphabet that contains all lowercase letters and digits
26//!
27//! let mut alphabet = Alphabet::default();
28//! alphabet.insert(alpha);
29//! alphabet.insert(digits);
30//! assert_eq!(alphabet.len(), 36);
31//! assert!(alphabet.contains('a'));
32//! assert!(alphabet.contains('z'));
33//! assert!(alphabet.contains('0'));
34//! assert!(alphabet.contains('9'));
35//! ```
36//!
37//! The [`Alphabet`] provides operations supports operations such as union, intersection, and complement. The type always maintains the most compact representation of the alphabet, i.e., it
38//! merges overlapping and adjacent ranges into a single range.
39//! For example, the alphabet that contains all lowercase letters and digits is represented as a single range `[0-9a-z]`. See [`Alphabet`] for more details.
40//!
41//! A different representation of the alphabet is provided by the [`AlphabetPartition`] type, which divides the SMT-LIB alphabet into a set of disjoint partitions.
42//! Each partition is a contiguous range of characters that are treated. The partitioning can be refined by splitting partitions into smaller partitions.
43//! See [partition] module for more details.
44
45pub mod partition;
46
47use std::{cmp::Ordering, fmt::Display};
48
49use quickcheck::Arbitrary;
50
51use crate::{CharIterator, SmtChar};
52
53/// A range of characters [SmtChar]s defined by a start and an end character.
54///
55/// Every range covers all characters between the start and the end character, including the start and the end character themselves.
56/// If the start character is greater than the end character, the range is empty, otherwise it spans exactly `end - start + 1` characters.
57///
58/// # Ordering
59///
60/// Ranges are ordered lexicographically.
61/// That is, if `[s1, e1]` and `[s2, e2]` are two ranges, then `[s1, e1] < [s2, e2]` precisely if
62///
63/// - `s1 < s2`,
64/// - or `s1 == s2` and `e1 < e2`.
65///
66/// ## Example
67/// The range `['a', 'k']` is less than the range `['a', 'z']` which is less than the range `['b', 'z']`.
68/// ```rust
69/// use smt_str::alphabet::CharRange;
70/// assert!(CharRange::new('a', 'k') < CharRange::new('a', 'z'));
71/// assert!(CharRange::new('a', 'z') < CharRange::new('b', 'z'));
72/// ```
73///
74/// # Set operations
75///
76/// `CharRange` supports several common set-theoretic operations:
77///
78/// - **Intersection**: The intersection of two ranges is the range that contains all characters that are in both ranges (see [intersect](#method.intersect)).
79/// - **Complement**: The complement of a range is the range(s) containing all characters (in the SMT-LIB alphabet) that are not in the original range (see [complement](#method.complement)).
80/// - **Difference**: The difference of two ranges is the range(s) containing all characters that are in the first range but not in the second range (see [subtract](#method.subtract)).
81/// - **Subset check**: Check if a range is a subset of another range (see [covers](#method.covers)).
82///
83/// All above operations are performed in O(1) time.
84/// Unions of ranges are not directly supported by the CharRange type but are represented by the [Alphabet] type.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
86pub struct CharRange {
87 start: SmtChar,
88 end: SmtChar,
89}
90
91impl CharRange {
92 /// Create a new range of characters between two characters `start` and `end` (inclusive), where `start` and `end` can be any type that can be converted into a [SmtChar].
93 /// If `start > end`, the range is empty.
94 /// If `start == end`, the range contains a single character.
95 ///
96 /// # Example
97 /// ```
98 /// use smt_str::alphabet::CharRange;
99 /// use smt_str::SmtChar;
100 ///
101 /// let range = CharRange::new('a', 'z');
102 /// assert_eq!(range.len(), 26);
103 ///
104 /// let empty = CharRange::new('z', 'a');
105 /// assert!(empty.is_empty());
106 ///
107 /// let single = CharRange::new('a', 'a');
108 /// assert_eq!(single.is_singleton(), Some('a'.into()));
109 /// ```
110 pub fn new(start: impl Into<SmtChar>, end: impl Into<SmtChar>) -> Self {
111 CharRange {
112 start: start.into(),
113 end: end.into(),
114 }
115 }
116
117 /// Creates an empty range that contains no characters.
118 ///
119 /// # Example
120 /// ```
121 /// use smt_str::alphabet::CharRange;
122 ///
123 /// let range = CharRange::empty();
124 /// assert!(range.is_empty());
125 /// ```
126 pub fn empty() -> Self {
127 CharRange {
128 start: SmtChar::new(1),
129 end: SmtChar::new(0),
130 }
131 }
132
133 /// Create a range that contains a single character.
134 ///
135 /// # Example
136 /// ```
137 /// use smt_str::alphabet::CharRange;
138 ///
139 /// let range = CharRange::singleton('a');
140 /// assert_eq!(range.len(), 1);
141 /// assert!(range.contains('a'));
142 /// ```
143 pub fn singleton(c: impl Into<SmtChar>) -> Self {
144 let c = c.into();
145 CharRange { start: c, end: c }
146 }
147
148 /// Create a range that covers all characters in the SMT-LIB alphabet, i.e., all characters in the range `[0, 0x2FFFF]`.
149 ///
150 /// # Example
151 /// ```
152 /// use smt_str::alphabet::CharRange;
153 /// use smt_str::SmtChar;
154 ///
155 /// let range = CharRange::all();
156 /// assert_eq!(range.len(), 0x2FFFF + 1);
157 /// assert!(range.contains(0));
158 /// assert!(range.contains(SmtChar::MAX));
159 /// ```
160 pub fn all() -> Self {
161 CharRange {
162 start: SmtChar::new(0),
163 end: SmtChar::new(0x2FFFF),
164 }
165 }
166
167 /// Return the number of characters in the range.
168 ///
169 /// # Example
170 /// ```
171 /// use smt_str::alphabet::CharRange;
172 /// use smt_str::SmtChar;
173 ///
174 /// assert_eq!(CharRange::new('a', 'z').len(), 26);
175 /// assert_eq!(CharRange::singleton('a').len(), 1);
176 /// assert_eq!(CharRange::new('z', 'a').len(), 0);
177 /// ```
178 pub fn len(&self) -> usize {
179 if self.start > self.end {
180 0
181 } else {
182 ((self.end.0 - self.start.0) + 1) as usize
183 }
184 }
185
186 /// Returns an iterator over all characters in the range.
187 /// The iterator returns all characters in the range, including the start and the end character.
188 /// If the range is empty, the iterator returns no characters.
189 ///
190 /// # Example
191 /// ```
192 /// use smt_str::alphabet::CharRange;
193 /// use smt_str::SmtChar;
194 ///
195 /// let range = CharRange::new('a', 'c');
196 /// let mut iter = range.iter();
197 /// assert_eq!(iter.next(), Some('a'.into()));
198 /// assert_eq!(iter.next(), Some('b'.into()));
199 /// assert_eq!(iter.next(), Some('c'.into()));
200 /// assert_eq!(iter.next(), None);
201 /// ```
202 pub fn iter(&self) -> impl Iterator<Item = SmtChar> {
203 CharIterator::new(self.start, self.end)
204 }
205
206 /// Check if the range is empty. The range is empty if the start character is greater than the end character.
207 ///
208 /// # Example
209 /// ```
210 /// use smt_str::alphabet::CharRange;
211 /// use smt_str::SmtChar;
212 ///
213 /// assert!(CharRange::empty().is_empty());
214 /// assert!(!CharRange::new('a', 'z').is_empty());
215 /// assert!(!CharRange::singleton('a').is_empty());
216 /// assert!(CharRange::new('z', 'a').is_empty());
217 /// ```
218 pub fn is_empty(&self) -> bool {
219 self.len() == 0
220 }
221
222 /// Returns the lower bound of the range.
223 ///
224 /// # Example
225 /// ```
226 /// use smt_str::alphabet::CharRange;
227 /// use smt_str::SmtChar;
228 ///
229 /// let range = CharRange::new('a', 'z');
230 /// assert_eq!(range.start(), 'a'.into());
231 ///
232 /// // Does not check for empty range
233 /// let empty = CharRange::new('z', 'a');
234 /// assert_eq!(empty.start(), 'z'.into());
235 /// ```
236 pub fn start(&self) -> SmtChar {
237 self.start
238 }
239
240 /// Returns the upper bound of the range.
241 ///
242 /// # Example
243 /// ```
244 /// use smt_str::alphabet::CharRange;
245 /// use smt_str::SmtChar;
246 ///
247 /// let range = CharRange::new('a', 'z');
248 /// assert_eq!(range.end(), 'z'.into());
249 ///
250 /// // Does not check for empty range
251 /// let empty = CharRange::new('z', 'a');
252 /// assert_eq!(empty.end(), 'a'.into());
253 /// ```
254 pub fn end(&self) -> SmtChar {
255 self.end
256 }
257
258 /// Returns a character from the range.
259 /// If the range is empty, returns `None`.
260 ///
261 /// # Example
262 /// ```
263 /// use smt_str::alphabet::CharRange;
264 /// use smt_str::SmtChar;
265 ///
266 /// let range = CharRange::new('a', 'z');
267 /// assert!(range.choose().is_some());
268 /// assert_eq!(CharRange::empty().choose(), None);
269 /// ```
270 pub fn choose(&self) -> Option<SmtChar> {
271 if self.is_empty() {
272 None
273 } else {
274 Some(self.start)
275 }
276 }
277
278 /// Check if the range contains a single character.
279 ///
280 ///
281 /// # Example
282 /// ```
283 /// use smt_str::alphabet::CharRange;
284 /// use smt_str::SmtChar;
285 ///
286 /// assert!(CharRange::singleton('a').is_singleton().is_some());
287 /// assert!(CharRange::new('a', 'z').is_singleton().is_none());
288 /// assert!(CharRange::empty().is_singleton().is_none());
289 /// ```
290 pub fn is_singleton(&self) -> Option<SmtChar> {
291 if self.start == self.end {
292 Some(self.start)
293 } else {
294 None
295 }
296 }
297
298 /// Check if the range contains all characters in the SMT-LIB alphabet.
299 ///
300 /// # Example
301 /// ```
302 /// use smt_str::alphabet::CharRange;
303 /// use smt_str::SmtChar;
304 ///
305 /// assert!(CharRange::all().is_full());
306 /// assert!(CharRange::new(SmtChar::MIN, SmtChar::MAX).is_full());
307 /// assert!(!CharRange::empty().is_full());
308 /// assert!(!CharRange::new('a', 'z').is_full());
309 /// ```
310 pub fn is_full(&self) -> bool {
311 self.start == SmtChar::MIN && self.end == SmtChar::MAX
312 }
313
314 /// Check if a character is in the range.
315 /// Returns true if the character is in the range, false otherwise.
316 ///
317 /// # Example
318 /// ```
319 /// use smt_str::alphabet::CharRange;
320 /// use smt_str::SmtChar;
321 ///
322 /// let range = CharRange::new('a', 'z');
323 /// assert!(range.contains('a'));
324 /// assert!(range.contains('z'));
325 /// assert!(range.contains('m'));
326 /// assert!(range.contains(98)); // 'a'
327 /// assert!(!range.contains('A'));
328 /// assert!(!range.contains('0'));
329 /// ``````
330 pub fn contains(&self, c: impl Into<SmtChar>) -> bool {
331 let c = c.into();
332 self.start <= c && c <= self.end
333 }
334
335 /// Checks if this range is a superset of another range.
336 /// Returns true if this range contains all characters in the other range.
337 ///
338 /// # Example
339 /// ```
340 /// use smt_str::alphabet::CharRange;
341 /// use smt_str::SmtChar;
342 ///
343 /// let r1 = CharRange::new('a', 'c');
344 /// let r2 = CharRange::new('a', 'b');
345 /// let r3 = CharRange::new('b', 'f');
346 ///
347 /// assert!(r1.covers(&r2));
348 /// assert!(!r1.covers(&r3));
349 /// assert!(r1.covers(&r1));
350 /// ```
351 pub fn covers(&self, other: &Self) -> bool {
352 self.start <= other.start && self.end >= other.end
353 }
354
355 /// Return the intersection of two ranges.
356 /// The intersection of two ranges is the range that contains all characters that are in both ranges.
357 /// If the two ranges do not overlap, the intersection is empty.
358 ///
359 /// # Example
360 /// ```
361 /// use smt_str::alphabet::CharRange;
362 /// use smt_str::SmtChar;
363 ///
364 /// let r1 = CharRange::new('a', 'm');
365 /// let r2 = CharRange::new('a', 'z');
366 /// let r3 = CharRange::singleton('a');
367 /// let r4 = CharRange::new('y', 'z');
368 ///
369 /// assert_eq!(r1.intersect(&r2), CharRange::new('a', 'm'));
370 /// assert_eq!(r1.intersect(&r3), CharRange::singleton('a'));
371 /// assert!(r1.intersect(&r4).is_empty());
372 /// ```
373 pub fn intersect(&self, other: &Self) -> Self {
374 let start = self.start.max(other.start);
375 let end = self.end.min(other.end);
376 CharRange::new(start, end)
377 }
378
379 /// Returns the complement of the SMT-LIB alphabet w.r.t. this range.
380 /// If this range is `[a, b]`, the complement is a union of ranges containing
381 ///
382 /// - `[0, a-1]` if `a > 0`,
383 /// - and `[b+1, MAX]` if `b < MAX`.
384 ///
385 /// Thus, the complement of an empty range is the full alphabet and the complement of the full alphabet is empty.
386 ///
387 /// # Example
388 /// ```
389 /// use smt_str::alphabet::CharRange;
390 /// use smt_str::SmtChar;
391 ///
392 /// let range = CharRange::new('a', 'd');
393 /// let complement = range.complement();
394 /// let mut iter = complement.into_iter();
395 /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from(0), SmtChar::from('a').saturating_prev())));
396 /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from('d').saturating_next(), SmtChar::MAX)));
397 /// assert_eq!(iter.next(), None);
398 ///
399 /// assert_eq!(CharRange::empty().complement(), vec![CharRange::all()]);
400 /// assert_eq!(CharRange::all().complement(), vec![]);
401 /// ```
402 pub fn complement(&self) -> Vec<CharRange> {
403 if self.is_empty() {
404 return vec![CharRange::all()];
405 }
406
407 let mut result = Vec::new();
408 if self.start > SmtChar::new(0) {
409 result.push(Self::new(0u32, self.start.saturating_prev()));
410 }
411 if self.end < SmtChar::MAX {
412 result.push(Self::new(self.end.saturating_next(), SmtChar::MAX));
413 }
414 result
415 }
416
417 /// Subtracts the other range from this ranges.
418 /// Returns the difference of two ranges.
419 /// The difference of two ranges is the range that contains all characters that are in the first range but not in the second range.
420 /// If the two ranges do not overlap, the difference is the first range itself.
421 /// If the first range is a subset of the second range, the difference is empty.
422 /// If the second range is a subset of the first range, the difference is the two ranges that are not overlapping.
423 /// If the two ranges are equal, the difference is empty.
424 ///
425 /// # Example
426 /// ```
427 /// use smt_str::alphabet::CharRange;
428 /// use smt_str::SmtChar;
429 ///
430 /// let r1 = CharRange::new('a', 'z');
431 /// let r2 = CharRange::new('a', 'm');
432 /// let r3 = CharRange::new('m', 'z');
433 /// let r4 = CharRange::singleton('c');
434 ///
435 /// assert_eq!(r1.subtract(&r2), vec![CharRange::new('n', 'z')]);
436 /// assert_eq!(r1.subtract(&r3), vec![CharRange::new('a', 'l')]);
437 /// assert_eq!(r2.subtract(&r3), vec![CharRange::new('a', 'l')]);
438 /// assert_eq!(r1.subtract(&r4), vec![CharRange::new('a', 'b'), CharRange::new('d', 'z')]);
439 /// assert_eq!(r2.subtract(&r2), vec![]);
440 /// ```
441 pub fn subtract(&self, other: &Self) -> Vec<CharRange> {
442 if self.is_empty() {
443 return vec![];
444 } else if other.is_empty() {
445 return vec![*self];
446 }
447 // No overlap, return self
448 if self.end < other.start || self.start > other.end {
449 return vec![*self];
450 }
451 let mut result = Vec::new();
452 // Left part before `other`
453 if self.start < other.start {
454 result.push(Self::new(self.start, other.start.saturating_prev()));
455 }
456 // Right part after `other`
457 if self.end > other.end {
458 result.push(Self::new(other.end.saturating_next(), self.end));
459 }
460 result
461 }
462}
463
464impl PartialOrd for CharRange {
465 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
466 Some(self.cmp(other))
467 }
468}
469impl Ord for CharRange {
470 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
471 match self.start.cmp(&other.start) {
472 std::cmp::Ordering::Equal => self.end.cmp(&other.end),
473 o => o,
474 }
475 }
476}
477
478/// A finite set of characters from the SMT-LIB alphabet, represented as a normalized set of disjoint [`CharRange`]s.
479///
480/// An `Alphabet` models a subset of the SMT-LIB character domain `[0x0000, 0x2FFFF]`.
481/// Internally, it is represented as a sorted list of [`CharRange`]s, where each range is:
482/// - non-empty,
483/// - non-overlapping with other ranges,
484/// - and not adjacent to any other range.
485///
486/// ## Normalization
487///
488/// The internal representation is kept normalized at all times:
489/// - Ranges are kept in ascending order by start character.
490/// - Overlapping or adjacent ranges are merged automatically upon insertion.
491/// - The internal structure is stored in a [`Vec`] of [`CharRange`]s.
492///
493// Keeping this invariant requires `O(n)` time for insertion, where `n` is the number of existing ranges in the alphabet.
494///
495/// ### Example
496///
497/// ```rust
498/// use smt_str::alphabet::{Alphabet, CharRange};
499///
500/// let mut a = Alphabet::default();
501/// a.insert(CharRange::new('a', 'f'));
502///
503/// // Overlapping range is merged
504/// a.insert(CharRange::new('d', 'h'));
505/// assert_eq!(a.iter_ranges().count(), 1);
506///
507/// // Adjacent range is also merged
508/// a.insert(CharRange::new('h', 'k'));
509/// assert_eq!(a.iter_ranges().count(), 1);
510/// ```
511///
512/// ## Set Operations
513///
514/// This type supports the following set operations, each returning a new normalized [`Alphabet`]:
515///
516/// - [`union`](#method.union): characters in either alphabet.
517/// - [`intersect`](#method.intersect): characters common to both alphabets.
518/// - [`subtract`](#method.subtract): characters in `self` but not in the other alphabet.
519/// - [`complement`](#method.complement): all characters not in the current alphabet (with respect to the SMT-LIB character domain).
520///
521/// All set operations are performed in O(n + m) time, where n and m are the number of ranges in the two alphabets.
522#[derive(Debug, Clone, Default, PartialEq, Eq)]
523pub struct Alphabet {
524 ranges: Vec<CharRange>,
525}
526
527impl Alphabet {
528 /// Returns the full alphabet, containing all characters in the SMT-LIB alphabet.
529 pub fn full() -> Self {
530 let mut alphabet = Alphabet::default();
531 alphabet.insert(CharRange::all());
532 alphabet
533 }
534
535 /// Returns an empty alphabet.
536 pub fn empty() -> Self {
537 Alphabet::default()
538 }
539
540 /// Check if the alphabet is empty.
541 ///
542 /// # Example
543 /// ```
544 /// use smt_str::alphabet::Alphabet;
545 /// let alphabet = Alphabet::empty();
546 /// assert!(alphabet.is_empty());
547 /// ```
548 pub fn is_empty(&self) -> bool {
549 self.ranges.is_empty()
550 }
551
552 /// The number of characters in the alphabet.
553 /// This is the sum of the number of characters in each range in the alphabet.
554 /// If the alphabet is empty, the length is zero.
555 ///
556 /// # Example
557 /// ```
558 /// use smt_str::alphabet::{Alphabet, CharRange};
559 ///
560 /// let mut alphabet = Alphabet::default();
561 /// alphabet.insert(CharRange::new('a', 'd')); // 4 characters
562 /// alphabet.insert(CharRange::new('x', 'z')); // 3 characters
563 /// assert_eq!(alphabet.len(), 7);
564 /// ```
565 pub fn len(&self) -> usize {
566 self.ranges.iter().map(|r| r.len()).sum()
567 }
568
569 /// Check if a character is in the alphabet.
570 /// Requires O(log n) time where n is the number of ranges in the alphabet.
571 ///
572 /// # Example
573 /// ```
574 /// use smt_str::alphabet::{Alphabet, CharRange};
575 ///
576 /// let mut alphabet = Alphabet::default();
577 /// alphabet.insert(CharRange::new('a', 'd'));
578 /// assert!(alphabet.contains('a'));
579 /// assert!(alphabet.contains('d'));
580 /// assert!(alphabet.contains('c'));
581 /// assert!(!alphabet.contains('e'));
582 /// ```
583 pub fn contains(&self, c: impl Into<SmtChar>) -> bool {
584 let c = c.into();
585 self.ranges
586 .binary_search_by(|range| {
587 if c < range.start() {
588 // c is before the range
589 Ordering::Greater
590 } else if c > range.end() {
591 // c is after the range
592 Ordering::Less
593 } else {
594 // c is in the range
595 Ordering::Equal
596 }
597 })
598 .is_ok()
599 }
600
601 /// Inserts a new character range into the alphabet, merging it with any overlapping or adjacent ranges.
602 ///
603 /// The internal representation is kept normalized: all ranges are non-overlapping, non-adjacent, and sorted by starting character.
604 /// This means that if the inserted range touches or overlaps existing ranges, it will be merged into a single contiguous range.
605 ///
606 /// Keeping this invariant requires `O(n)` time for insertion, where `n` is the number of existing ranges in the alphabet.
607 ///
608 /// Inserting an empty range has no effect.
609 ///
610 ///
611 /// # Examples
612 ///
613 /// ```rust
614 /// use smt_str::alphabet::{Alphabet, CharRange};
615 ///
616 /// let mut a = Alphabet::default();
617 /// a.insert(CharRange::new('a', 'c'));
618 /// a.insert(CharRange::new('d', 'f')); // Adjacent: merged into ['a','f']
619 ///
620 /// let ranges: Vec<_> = a.iter_ranges().collect();
621 /// assert_eq!(ranges, vec![CharRange::new('a', 'f')]);
622 /// ```
623 pub fn insert(&mut self, new: CharRange) {
624 if new.is_empty() {
625 return;
626 }
627
628 // Find where `new.start` fits in the sorted list
629 let mut i = match self.ranges.binary_search_by(|r| r.start.cmp(&new.start)) {
630 Ok(i) => i,
631 Err(i) => i,
632 };
633
634 let mut start = new.start;
635 let mut end = new.end;
636
637 // Merge backwards if the previous range overlaps or is adjacent `new`
638 if i > 0 {
639 let prev = self.ranges[i - 1];
640 if prev.end.saturating_next() >= new.start {
641 i -= 1;
642 start = start.min(prev.start);
643 end = end.max(prev.end);
644 self.ranges.remove(i); // remove prev
645 }
646 }
647
648 // Move forward and remove all overlapping/adjacent ranges
649 while i < self.ranges.len() {
650 let r = self.ranges[i];
651 if r.start > end.saturating_next() {
652 break;
653 }
654 start = start.min(r.start);
655 end = end.max(r.end);
656 self.ranges.remove(i); // shift left, now `i` points to the next range
657 }
658
659 // Insert merged range
660 self.ranges.insert(i, CharRange::new(start, end));
661 }
662
663 /// Insert a new character in the alphabet.
664 /// Equivalent to `insert(CharRange::singleton(c))`.
665 /// See [insert](#method.insert) for more details.
666 pub fn insert_char(&mut self, c: impl Into<SmtChar>) {
667 self.insert(CharRange::singleton(c));
668 }
669
670 /* Set operations */
671
672 /// Computes the union of two alphabets and returns the result.
673 ///
674 /// The union contains all characters that appear in either `self` or `other`.
675 ///
676 /// This method performs a single linear pass over the ranges in both alphabets to merge overlapping and adjacent ranges.
677 /// Requires O(n + m) space and time, where n and m are the number of ranges in the two alphabets.
678 ///
679 /// The resulting `Alphabet` preserves the invariant that its character ranges are sorted and normalized, i.e., non-overlapping and non-adjacent.
680 ///
681 ///
682 /// # Examples
683 /// ```
684 /// use smt_str::alphabet::{Alphabet, CharRange};
685 ///
686 /// let mut a1 = Alphabet::default();
687 /// a1.insert(CharRange::new('a', 'c'));
688 ///
689 /// let mut a2 = Alphabet::default();
690 /// a2.insert(CharRange::new('d', 'f'));
691 ///
692 /// let union = a1.union(&a2);
693 ///
694 /// let mut iter = union.iter_ranges();
695 /// assert_eq!(iter.next(), Some(CharRange::new('a', 'f'))); // merged
696 /// assert_eq!(iter.next(), None);
697 /// ```
698 pub fn union(&self, other: &Self) -> Self {
699 let mut merged = Vec::with_capacity(self.ranges.len() + other.ranges.len());
700 let mut i = 0;
701 let mut j = 0;
702 let mut active = CharRange::empty();
703
704 /// Merge helper: adds next range into active or flushes it to the merged vector
705 /// If the active range is empty, it is set to the next range.
706 /// If the active range overlaps with the next range, it is extended.
707 /// Otherwise, the active range is flushed to the merged vector and the next range becomes the new active range.
708 fn merge_into(active: &mut CharRange, next: CharRange, merged: &mut Vec<CharRange>) {
709 if active.is_empty() {
710 *active = next;
711 } else if active.end.saturating_next() >= next.start {
712 active.end = active.end.max(next.end);
713 } else {
714 merged.push(*active);
715 *active = next;
716 }
717 }
718
719 // Merge two sorted sequences
720 while i < self.ranges.len() && j < other.ranges.len() {
721 let r1 = self.ranges[i];
722 let r2 = other.ranges[j];
723 debug_assert!(!r1.is_empty());
724 debug_assert!(!r2.is_empty());
725
726 if r1.start <= r2.start {
727 merge_into(&mut active, r1, &mut merged);
728 i += 1;
729 } else {
730 merge_into(&mut active, r2, &mut merged);
731 j += 1;
732 }
733 }
734
735 // Handle the remaining ranges
736 while i < self.ranges.len() {
737 merge_into(&mut active, self.ranges[i], &mut merged);
738 i += 1;
739 }
740 while j < other.ranges.len() {
741 merge_into(&mut active, other.ranges[j], &mut merged);
742 j += 1;
743 }
744
745 if !active.is_empty() {
746 merged.push(active);
747 }
748
749 Alphabet { ranges: merged }
750 }
751
752 /// Computes the intersection of two alphabets and returns the result.
753 ///
754 /// The result contains all characters that are present in both `self` and `other`.
755 /// This implementation performs a linear-time merge over the sorted range lists
756 /// and computes pairwise intersections of overlapping ranges.
757 /// Hence, the function needs O(n + m) time and O(min(n,m)) space, where n and m are the number of ranges in the two alphabets.
758 ///
759 /// The result is guaranteed to be sorted, non-overlapping, and non-adjacent.
760 ///
761 /// # Example
762 /// ```
763 /// use smt_str::alphabet::{Alphabet, CharRange};
764 /// use smt_str::SmtChar;
765 ///
766 /// let mut a1 = Alphabet::default();
767 /// a1.insert(CharRange::new('a', 'd'));
768 /// a1.insert(CharRange::new('x', 'z'));
769 ///
770 /// let mut a2 = Alphabet::default();
771 /// a2.insert(CharRange::new('c', 'f'));
772 /// a2.insert(CharRange::new('y', 'z'));
773 ///
774 /// let intersection = a1.intersect(&a2);
775 ///
776 /// let mut iter = intersection.iter_ranges();
777 /// assert_eq!(iter.next(), Some(CharRange::new('c', 'd')));
778 /// assert_eq!(iter.next(), Some(CharRange::new('y', 'z')));
779 /// assert_eq!(iter.next(), None);
780 /// ```
781 pub fn intersect(&self, other: &Self) -> Self {
782 let mut result = Vec::new();
783 let mut i = 0;
784 let mut j = 0;
785
786 while i < self.ranges.len() && j < other.ranges.len() {
787 let r1 = self.ranges[i];
788 let r2 = other.ranges[j];
789
790 if r1.end < r2.start {
791 // r1 ends before r2 starts
792 i += 1;
793 } else if r2.end < r1.start {
794 // r2 ends before r1 starts
795 j += 1;
796 } else {
797 // Ranges overlap, compute intersection is O(1)
798 result.push(r1.intersect(&r2));
799 // Move the range that ends first
800 if r1.end < r2.end {
801 i += 1;
802 } else {
803 j += 1;
804 }
805 }
806 }
807 Alphabet { ranges: result }
808 }
809
810 /// Computes the complement of the alphabet with respect to the full SMT-LIB character set.
811 ///
812 /// The result contains all characters that are **not present** in this alphabet.
813 /// That is, if this alphabet contains characters `A`, the complement contains the set `[0x0000, 0x2FFFF] \ A`.
814 ///
815 /// Computing the complement is done in a single pass over the ranges in the alphabet.
816 /// It requires O(n) time and space, where n is the number of ranges in the alphabet.
817 ///
818 /// The output is a normalized `Alphabet` with non-overlapping, non-adjacent and sorted ranges.
819 ///
820 /// # Example
821 /// ```
822 /// use smt_str::alphabet::{Alphabet, CharRange};
823 /// use smt_str::SmtChar;
824 ///
825 /// let mut a = Alphabet::default();
826 /// a.insert(CharRange::new('a', 'd'));
827 /// a.insert(CharRange::new('x', 'z'));
828 ///
829 /// let complement = a.complement();
830 ///
831 /// let mut iter = complement.iter_ranges();
832 /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from(0), SmtChar::from('a').saturating_prev())));
833 /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from('d').saturating_next(), SmtChar::from('x').saturating_prev())));
834 /// assert_eq!(iter.next(), Some(CharRange::new(SmtChar::from('z').saturating_next(), SmtChar::MAX)));
835 /// assert_eq!(iter.next(), None);
836 /// ```
837 pub fn complement(&self) -> Self {
838 let mut result = Alphabet::default();
839 let mut last = SmtChar::new(0);
840 for r in &self.ranges {
841 if last < r.start {
842 result.insert(CharRange::new(last, r.start.saturating_prev()));
843 }
844 last = match r.end.try_next() {
845 Some(s) => s,
846 None => return result, // We have reached the end of the SMT-LIB alphabet
847 }
848 }
849 if last <= SmtChar::MAX {
850 result.insert(CharRange::new(last, SmtChar::MAX));
851 }
852 result
853 }
854
855 /// Computes the set difference `self \ other` and returns a new `Alphabet`.
856 ///
857 /// The result contains all characters that are present in `self` but **not** in `other`.
858 ///
859 /// This operation is analogous to set difference and preserves normalization
860 /// (sorted, non-overlapping, non-adjacent ranges).
861 /// It needs O(n + m) and O(n + m) space, where n and m are the number of ranges in the two alphabets.
862 ///
863 /// # Example
864 /// ```
865 /// use smt_str::alphabet::{Alphabet, CharRange};
866 ///
867 /// let mut a = Alphabet::default();
868 /// a.insert(CharRange::new('a', 'f'));
869 ///
870 /// let mut b = Alphabet::default();
871 /// b.insert(CharRange::new('c', 'd'));
872 ///
873 /// let diff = a.subtract(&b);
874 /// let expected = [CharRange::new('a', 'b'), CharRange::new('e', 'f')];
875 ///
876 /// let ranges: Vec<_> = diff.iter_ranges().collect();
877 /// assert_eq!(ranges, expected);
878 /// ```
879 pub fn subtract(&self, other: &Self) -> Self {
880 // This could be done with a single pass over the ranges, which would not require allocating an alphabet for the complement.
881 // However, the current implementation is simpler and has the same runtime complexity.
882 let other_comp = other.complement();
883 self.intersect(&other_comp)
884 }
885
886 /// Return an iterator over the ranges in the alphabet.
887 pub fn iter_ranges(&self) -> impl Iterator<Item = CharRange> + '_ {
888 self.ranges.iter().copied()
889 }
890
891 /// Return an iterator over all characters in the alphabet.
892 pub fn iter(&self) -> impl Iterator<Item = SmtChar> + '_ {
893 self.iter_ranges().flat_map(|r| r.iter())
894 }
895}
896
897impl From<CharRange> for Alphabet {
898 fn from(r: CharRange) -> Self {
899 let mut alphabet = Alphabet::default();
900 alphabet.insert(r);
901 alphabet
902 }
903}
904
905impl FromIterator<CharRange> for Alphabet {
906 fn from_iter<T: IntoIterator<Item = CharRange>>(iter: T) -> Self {
907 let mut alphabet = Alphabet::default();
908 for r in iter {
909 alphabet.insert(r);
910 }
911 alphabet
912 }
913}
914
915impl FromIterator<SmtChar> for Alphabet {
916 fn from_iter<T: IntoIterator<Item = SmtChar>>(iter: T) -> Self {
917 let mut alphabet = Alphabet::default();
918 for c in iter {
919 alphabet.insert_char(c);
920 }
921 alphabet
922 }
923}
924
925impl Display for CharRange {
926 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
927 if self.is_empty() {
928 write!(f, "[]")
929 } else if self.start == self.end {
930 write!(f, "[{}]", self.start)
931 } else {
932 write!(f, "[{}-{}]", self.start, self.end)
933 }
934 }
935}
936
937impl Display for Alphabet {
938 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
939 write!(f, "{{")?;
940 for (i, r) in self.iter_ranges().enumerate() {
941 write!(f, "{}", r)?;
942 if i < self.ranges.len() - 1 {
943 write!(f, ", ")?;
944 }
945 }
946 write!(f, "}}")
947 }
948}
949
950impl Arbitrary for CharRange {
951 fn arbitrary(g: &mut quickcheck::Gen) -> Self {
952 let start = SmtChar::arbitrary(g);
953 let end = SmtChar::arbitrary(g);
954 CharRange::new(start.min(end), end.max(start))
955 }
956}
957
958impl Arbitrary for Alphabet {
959 fn arbitrary(g: &mut quickcheck::Gen) -> Self {
960 let mut alphabet = Alphabet::default();
961 let ranges: Vec<CharRange> = Arbitrary::arbitrary(g);
962 for r in ranges {
963 alphabet.insert(r);
964 }
965 alphabet
966 }
967}
968
969#[cfg(test)]
970mod tests {
971
972 use quickcheck_macros::quickcheck;
973
974 use crate::{
975 alphabet::{Alphabet, CharRange},
976 CharIterator, SmtChar,
977 };
978
979 #[quickcheck]
980 fn all_range_contains_all_chars(c: SmtChar) -> bool {
981 let range = super::CharRange::all();
982 range.contains(c)
983 }
984
985 #[quickcheck]
986 fn range_contain_all_between(s: SmtChar, e: SmtChar) {
987 let range = super::CharRange::new(s, e);
988 let mut len = 0;
989 for c in CharIterator::new(s, e) {
990 len += 1;
991 assert!(range.contains(c));
992 }
993 assert_eq!(len, range.len());
994 }
995
996 #[quickcheck]
997 fn range_intersect_self(r: CharRange) -> bool {
998 let i = r.intersect(&r);
999 i == r
1000 }
1001
1002 #[quickcheck]
1003 fn range_intersect_empty(r: CharRange) -> bool {
1004 let empty = CharRange::empty();
1005 let i = r.intersect(&empty);
1006 i.is_empty()
1007 }
1008
1009 #[quickcheck]
1010 fn range_intersect_all(r: CharRange) -> bool {
1011 let all = CharRange::all();
1012 let i = r.intersect(&all);
1013 i == r
1014 }
1015
1016 #[quickcheck]
1017 fn range_intersect_correct(r1: CharRange, r2: CharRange) {
1018 let i = r1.intersect(&r2);
1019
1020 for c in r1.iter() {
1021 if r2.contains(c) {
1022 assert!(i.contains(c));
1023 } else {
1024 assert!(!i.contains(c));
1025 }
1026 }
1027
1028 for c in r2.iter() {
1029 if r1.contains(c) {
1030 assert!(i.contains(c));
1031 } else {
1032 assert!(!i.contains(c));
1033 }
1034 }
1035
1036 for c in i.iter() {
1037 assert!(r1.contains(c));
1038 assert!(r2.contains(c));
1039 }
1040 }
1041
1042 #[quickcheck]
1043 fn range_difference_self(r: CharRange) -> bool {
1044 let d = r.subtract(&r);
1045 d.is_empty()
1046 }
1047
1048 #[quickcheck]
1049 fn range_difference_empty(r: CharRange) {
1050 let empty = CharRange::empty();
1051 let d = r.subtract(&empty);
1052 assert_eq!(d.len(), 1);
1053 assert_eq!(d[0], r)
1054 }
1055
1056 #[quickcheck]
1057 fn range_difference_all(r: CharRange) -> bool {
1058 let all = CharRange::all();
1059 let d = r.subtract(&all);
1060 d.is_empty()
1061 }
1062
1063 #[quickcheck]
1064 fn range_difference_correct(r1: CharRange, r2: CharRange) {
1065 let diff = r1.subtract(&r2);
1066
1067 for c in r1.iter() {
1068 if r2.contains(c) {
1069 assert!(!diff.iter().any(|r| r.contains(c)));
1070 } else {
1071 assert!(diff.iter().any(|r| r.contains(c)));
1072 }
1073 }
1074
1075 for c in r2.iter() {
1076 if r1.contains(c) {
1077 assert!(!diff.iter().any(|r| r.contains(c)));
1078 }
1079 }
1080
1081 for cr in diff.iter() {
1082 for c in cr.iter() {
1083 assert!(r1.contains(c), "{c} is diff {cr} but not in r1 = {r1}");
1084 assert!(!r2.contains(c));
1085 }
1086 }
1087 }
1088
1089 /* Alphabet */
1090
1091 fn alphabet(ranges: &[CharRange]) -> Alphabet {
1092 let mut a = Alphabet::default();
1093 for r in ranges {
1094 a.insert(*r);
1095 }
1096 a
1097 }
1098
1099 fn ranges_eq(a: &Alphabet, expected: &[CharRange]) {
1100 let actual: Vec<_> = a.iter_ranges().collect();
1101 let expected: Vec<_> = expected.to_vec();
1102 assert_eq!(actual, expected);
1103 }
1104
1105 fn range(start: char, end: char) -> CharRange {
1106 CharRange::new(start, end)
1107 }
1108
1109 #[quickcheck]
1110 fn test_alphabet_insert_empty(r: CharRange) {
1111 let mut alphabet = Alphabet::default();
1112 alphabet.insert(r);
1113 assert_eq!(alphabet.iter_ranges().next(), Some(r));
1114 assert_eq!(alphabet.iter_ranges().count(), 1);
1115 }
1116
1117 #[test]
1118 fn test_alphabet_insert_non_overlapping() {
1119 let a = alphabet(&[range('a', 'c'), range('e', 'g')]);
1120 ranges_eq(&a, &[range('a', 'c'), range('e', 'g')]);
1121 }
1122
1123 #[test]
1124 fn test_alphabet_insert_adjacent_right_merges() {
1125 let a = alphabet(&[range('a', 'c'), range('d', 'f')]);
1126 ranges_eq(&a, &[range('a', 'f')]);
1127 }
1128
1129 #[test]
1130 fn test_alphabet_insert_adjacent_left_merges() {
1131 let a = alphabet(&[range('d', 'f'), range('a', 'c')]);
1132 ranges_eq(&a, &[range('a', 'f')]);
1133 }
1134
1135 #[test]
1136 fn test_alphabet_insert_overlapping_right_merges() {
1137 let a = alphabet(&[range('a', 'c'), range('b', 'd')]);
1138 ranges_eq(&a, &[range('a', 'd')]);
1139 }
1140
1141 #[test]
1142 fn test_alphabet_insert_overlapping_left_merges() {
1143 let a = alphabet(&[range('b', 'd'), range('a', 'c')]);
1144 ranges_eq(&a, &[range('a', 'd')]);
1145 }
1146
1147 #[test]
1148 fn test_alphabet_insert_touching_multiple_merges_all() {
1149 let a = alphabet(&[
1150 range('a', 'c'),
1151 range('e', 'g'),
1152 range('i', 'k'),
1153 range('d', 'j'),
1154 ]);
1155 ranges_eq(&a, &[range('a', 'k')]);
1156 }
1157
1158 #[test]
1159 fn test_alphabet_insert_inside_existing_range_no_change() {
1160 let mut a = Alphabet::default();
1161 a.insert(range('a', 'z'));
1162 a.insert(range('d', 'f'));
1163 ranges_eq(&a, &[range('a', 'z')]);
1164 }
1165
1166 #[test]
1167 fn test_alphabet_test_alphabet_insert_subsuming_range() {
1168 let mut a = Alphabet::default();
1169 a.insert(range('d', 'f'));
1170 a.insert(range('a', 'z'));
1171 ranges_eq(&a, &[range('a', 'z')]);
1172 }
1173
1174 #[test]
1175 fn test_alphabet_insert_single_char_merges_with_existing() {
1176 let a = alphabet(&[range('a', 'c'), CharRange::singleton('d')]);
1177 ranges_eq(&a, &[range('a', 'd')]);
1178 }
1179
1180 #[test]
1181 fn test_alphabet_insert_disjoint_apart() {
1182 let a = alphabet(&[range('a', 'b'), range('x', 'z')]);
1183 ranges_eq(&a, &[range('a', 'b'), range('x', 'z')]);
1184 }
1185
1186 #[quickcheck]
1187 fn alphabet_non_normalized_two_ranges(r1: CharRange, r2: CharRange) {
1188 let mut alphabet = Alphabet::default();
1189 alphabet.insert(r1);
1190 alphabet.insert(r2);
1191
1192 let mut iter = alphabet.iter_ranges();
1193 let mut last = iter.next();
1194 for r in iter {
1195 // Not overlapping
1196 let i = last.unwrap().intersect(&r);
1197 assert!(
1198 i.is_empty(),
1199 "{}: Intersection of {} and {} = {}",
1200 alphabet,
1201 last.unwrap(),
1202 r,
1203 i
1204 );
1205 // Not adjacent:
1206 assert!(last.unwrap().end.saturating_next() < r.start);
1207 last = Some(r);
1208 }
1209 }
1210
1211 #[quickcheck]
1212 fn alphabet_non_normalized(ranges: Vec<CharRange>) {
1213 let mut alphabet = Alphabet::default();
1214 for r in ranges {
1215 alphabet.insert(r);
1216 }
1217
1218 let mut iter = alphabet.iter_ranges();
1219 let mut last = iter.next();
1220 for r in iter {
1221 // Not overlapping
1222 let i = last.unwrap().intersect(&r);
1223 assert!(
1224 i.is_empty(),
1225 "{}: Intersection of {} and {} = {}",
1226 alphabet,
1227 last.unwrap(),
1228 r,
1229 i
1230 );
1231 // Not adjacent:
1232 assert!(last.unwrap().end.saturating_next() < r.start);
1233 last = Some(r);
1234 }
1235 }
1236
1237 #[test]
1238 fn test_alphabet_contains() {
1239 let a = alphabet(&[range('a', 'f'), range('x', 'z')]);
1240 assert!(a.contains('a'));
1241 assert!(a.contains('e'));
1242 assert!(!a.contains('m'));
1243 assert!(a.contains('x'));
1244 assert!(!a.contains('w'));
1245 }
1246
1247 #[quickcheck]
1248 fn test_alphabet_contains_2(ranges: Vec<CharRange>) {
1249 let alphabet = alphabet(&ranges);
1250 for r in ranges {
1251 if let Some(c) = r.choose() {
1252 assert!(alphabet.contains(c));
1253 }
1254 }
1255 }
1256
1257 /* Set operations */
1258
1259 #[quickcheck]
1260 fn alphabet_union_self(a: Alphabet) -> bool {
1261 let u = a.union(&a);
1262 u == a
1263 }
1264
1265 #[quickcheck]
1266 fn alphabet_union_empty(a: Alphabet) -> bool {
1267 let empty = Alphabet::default();
1268 let u = a.union(&empty);
1269 u == a
1270 }
1271
1272 #[quickcheck]
1273 fn alphabet_union_all(a: Alphabet) -> bool {
1274 let all = Alphabet::full();
1275 let u = a.union(&all);
1276 u == all
1277 }
1278
1279 #[test]
1280 fn alphabet_union_disjoint() {
1281 let a1 = alphabet(&[range('a', 'c')]);
1282 let a2 = alphabet(&[range('x', 'z')]);
1283 let union = a1.union(&a2);
1284 ranges_eq(&union, &[range('a', 'c'), range('x', 'z')]);
1285 }
1286
1287 #[test]
1288 fn alphabet_union_overlapping() {
1289 let a1 = alphabet(&[range('a', 'f')]);
1290 let a2 = alphabet(&[range('d', 'k')]);
1291 let union = a1.union(&a2);
1292 ranges_eq(&union, &[range('a', 'k')]);
1293 }
1294
1295 #[test]
1296 fn alphabet_union_adjacent() {
1297 let a1 = alphabet(&[range('a', 'c')]);
1298 let a2 = alphabet(&[range('d', 'f')]);
1299 let union = a1.union(&a2);
1300 ranges_eq(&union, &[range('a', 'f')]);
1301 }
1302
1303 #[test]
1304 fn alphabet_union_multiple_merge() {
1305 let a1 = alphabet(&[range('a', 'c'), range('f', 'h')]);
1306 let a2 = alphabet(&[range('b', 'g')]);
1307 let union = a1.union(&a2);
1308 ranges_eq(&union, &[range('a', 'h')]);
1309 }
1310
1311 #[quickcheck]
1312 fn alphabet_union_correct(a1: Alphabet, a2: Alphabet) -> bool {
1313 let u = a1.union(&a2);
1314 for c in u.iter() {
1315 if a1.contains(c) || a2.contains(c) {
1316 assert!(u.contains(c));
1317 } else {
1318 assert!(!u.contains(c));
1319 }
1320 }
1321 true
1322 }
1323
1324 #[quickcheck]
1325 fn alphabet_intersect_self(a: Alphabet) -> bool {
1326 let i = a.intersect(&a);
1327 i == a
1328 }
1329
1330 #[quickcheck]
1331 fn alphabet_intersect_empty(a: Alphabet) -> bool {
1332 let empty = Alphabet::default();
1333 let i = a.intersect(&empty);
1334 i.is_empty()
1335 }
1336
1337 #[quickcheck]
1338 fn alphabet_intersect_all(a: Alphabet) -> bool {
1339 let all = Alphabet::full();
1340 let i = a.intersect(&all);
1341 i == a
1342 }
1343
1344 #[test]
1345 fn intersect_disjoint() {
1346 let a1 = alphabet(&[range('a', 'c')]);
1347 let a2 = alphabet(&[range('x', 'z')]);
1348 let intersection = a1.intersect(&a2);
1349 assert!(intersection.is_empty());
1350 }
1351
1352 #[test]
1353 fn intersect_overlap() {
1354 let a1 = alphabet(&[range('a', 'f')]);
1355 let a2 = alphabet(&[range('d', 'k')]);
1356 let intersection = a1.intersect(&a2);
1357 ranges_eq(&intersection, &[range('d', 'f')]);
1358 }
1359
1360 #[test]
1361 fn intersect_adjacent_is_empty() {
1362 let a1 = alphabet(&[range('a', 'c')]);
1363 let a2 = alphabet(&[range('d', 'f')]);
1364 let intersection = a1.intersect(&a2);
1365 assert!(intersection.is_empty());
1366 }
1367
1368 #[test]
1369 fn intersect_subset() {
1370 let a1 = alphabet(&[range('a', 'z')]);
1371 let a2 = alphabet(&[range('g', 'm')]);
1372 let intersection = a1.intersect(&a2);
1373 ranges_eq(&intersection, &[range('g', 'm')]);
1374 }
1375
1376 #[test]
1377 fn intersect_multiple_ranges() {
1378 let a1 = alphabet(&[range('a', 'd'), range('f', 'j')]);
1379 let a2 = alphabet(&[range('c', 'g')]);
1380 let intersection = a1.intersect(&a2);
1381 ranges_eq(&intersection, &[range('c', 'd'), range('f', 'g')]);
1382 }
1383
1384 #[quickcheck]
1385 fn alphabet_intersect_correct(a1: Alphabet, a2: Alphabet) -> bool {
1386 let i = a1.intersect(&a2);
1387 for c in i.iter() {
1388 assert!(a1.contains(c));
1389 assert!(a2.contains(c));
1390 }
1391 for c in a1.iter() {
1392 if a2.contains(c) {
1393 assert!(i.contains(c));
1394 } else {
1395 assert!(!i.contains(c));
1396 }
1397 }
1398 for c in a2.iter() {
1399 if a1.contains(c) {
1400 assert!(i.contains(c));
1401 } else {
1402 assert!(!i.contains(c));
1403 }
1404 }
1405 true
1406 }
1407
1408 #[quickcheck]
1409 fn alphabet_complement_self(a: Alphabet) -> bool {
1410 let c = a.complement();
1411 let u = a.union(&c);
1412 u == Alphabet::full()
1413 }
1414
1415 #[test]
1416 fn alphabet_complement_self_full() {
1417 let a = Alphabet::full();
1418 let c = a.complement();
1419 let u = a.union(&c);
1420 assert_eq!(u, Alphabet::full())
1421 }
1422
1423 #[test]
1424 fn complement_single_range() {
1425 let a = alphabet(&[range('b', 'd')]);
1426 let complement = a.complement();
1427
1428 let expected = vec![
1429 CharRange::new(SmtChar::from(0), SmtChar::from('b').saturating_prev()),
1430 CharRange::new(SmtChar::from('d').saturating_next(), SmtChar::MAX),
1431 ];
1432 ranges_eq(&complement, &expected);
1433 }
1434
1435 #[test]
1436 fn complement_multiple_ranges() {
1437 let a = alphabet(&[range('a', 'c'), range('f', 'h'), range('k', 'k')]);
1438 let complement = a.complement();
1439
1440 let expected = vec![
1441 CharRange::new(SmtChar::from(0), SmtChar::from('a').saturating_prev()),
1442 CharRange::new(
1443 SmtChar::from('c').saturating_next(),
1444 SmtChar::from('f').saturating_prev(),
1445 ),
1446 CharRange::new(
1447 SmtChar::from('h').saturating_next(),
1448 SmtChar::from('k').saturating_prev(),
1449 ),
1450 CharRange::new(SmtChar::from('k').saturating_next(), SmtChar::MAX),
1451 ];
1452
1453 ranges_eq(&complement, &expected);
1454 }
1455
1456 #[test]
1457 fn complement_range_from_zero() {
1458 let a = alphabet(&[CharRange::new(SmtChar::from(0), SmtChar::from('x'))]);
1459 let complement = a.complement();
1460
1461 let expected = vec![CharRange::new(
1462 SmtChar::from('x').saturating_next(),
1463 SmtChar::MAX,
1464 )];
1465
1466 ranges_eq(&complement, &expected);
1467 }
1468
1469 #[test]
1470 fn complement_range_to_max() {
1471 let a = alphabet(&[CharRange::new(SmtChar::from('x'), SmtChar::MAX)]);
1472 let complement = a.complement();
1473
1474 let expected = vec![CharRange::new(
1475 SmtChar::from(0),
1476 SmtChar::from('x').saturating_prev(),
1477 )];
1478
1479 ranges_eq(&complement, &expected);
1480 }
1481
1482 #[test]
1483 fn alphabet_full_complement_empty() {
1484 let empty = Alphabet::default();
1485 let full = Alphabet::full();
1486 assert_eq!(full.complement(), empty);
1487 assert_eq!(empty.complement(), full);
1488 }
1489
1490 #[quickcheck]
1491 fn alphabet_intersect_comp_self(a: Alphabet) -> bool {
1492 let i = a.intersect(&a.complement());
1493 i.is_empty()
1494 }
1495
1496 #[quickcheck]
1497 fn alphabet_union_comp_self(a: Alphabet) {
1498 let u = a.union(&a.complement());
1499 assert_eq!(u, Alphabet::full());
1500 }
1501
1502 #[test]
1503 fn alphabet_union_comp_self_but_last() {
1504 let range = CharRange::new(0u32, SmtChar::MAX.saturating_prev());
1505 let mut a = Alphabet::default();
1506 a.insert(range);
1507 let comp = a.complement();
1508 let u = a.union(&comp);
1509 assert_eq!(u, Alphabet::full());
1510 }
1511
1512 #[quickcheck]
1513 fn alphabet_insert_complement(r: CharRange) -> bool {
1514 let mut alphabet = Alphabet::default();
1515 alphabet.insert(r);
1516 let c = alphabet.complement();
1517 let u = alphabet.union(&c);
1518 u == Alphabet::full()
1519 }
1520
1521 #[test]
1522 fn subtract_disjoint() {
1523 let a = alphabet(&[range('a', 'f')]);
1524 let b = alphabet(&[range('x', 'z')]);
1525 let diff = a.subtract(&b);
1526 assert_eq!(diff, a);
1527 }
1528
1529 #[test]
1530 fn subtract_complete_overlap() {
1531 let a = alphabet(&[range('a', 'f')]);
1532 let b = alphabet(&[range('a', 'f')]);
1533 let diff = a.subtract(&b);
1534 assert!(diff.is_empty());
1535 }
1536
1537 #[quickcheck]
1538 fn subtract_empty_neutral(a: Alphabet) -> bool {
1539 let empty = Alphabet::default();
1540 let diff = a.subtract(&empty);
1541 diff == a
1542 }
1543
1544 #[quickcheck]
1545 fn subtract_all_empty(a: Alphabet) -> bool {
1546 let all = Alphabet::full();
1547 let diff = a.subtract(&all);
1548 diff.is_empty()
1549 }
1550
1551 #[test]
1552 fn subtract_partial_overlap_middle() {
1553 let a = alphabet(&[range('a', 'f')]);
1554 let b = alphabet(&[range('c', 'd')]);
1555 let diff = a.subtract(&b);
1556 ranges_eq(&diff, &[range('a', 'b'), range('e', 'f')]);
1557 }
1558
1559 #[test]
1560 fn subtract_leading_overlap() {
1561 let a = alphabet(&[range('a', 'f')]);
1562 let b = alphabet(&[range('a', 'c')]);
1563 let diff = a.subtract(&b);
1564 ranges_eq(&diff, &[range('d', 'f')]);
1565 }
1566
1567 #[test]
1568 fn subtract_trailing_overlap() {
1569 let a = alphabet(&[range('a', 'f')]);
1570 let b = alphabet(&[range('d', 'f')]);
1571 let diff = a.subtract(&b);
1572 ranges_eq(&diff, &[range('a', 'c')]);
1573 }
1574
1575 #[test]
1576 fn subtract_multiple_ranges() {
1577 let a = alphabet(&[range('a', 'z')]);
1578 let b = alphabet(&[range('b', 'd'), range('x', 'z')]);
1579 let diff = a.subtract(&b);
1580 ranges_eq(&diff, &[range('a', 'a'), range('e', 'w')]);
1581 }
1582
1583 #[test]
1584 fn subtract_self_is_empty() {
1585 let a = alphabet(&[range('a', 'z')]);
1586 assert!(a.subtract(&a).is_empty());
1587 }
1588
1589 #[test]
1590 fn subtract_from_empty_is_empty() {
1591 let a = Alphabet::default();
1592 let b = alphabet(&[range('a', 'z')]);
1593 assert!(a.subtract(&b).is_empty());
1594 }
1595
1596 #[quickcheck]
1597 fn subtract_correct(a: Alphabet, b: Alphabet) -> bool {
1598 let diff = a.subtract(&b);
1599 for c in diff.iter() {
1600 if a.contains(c) && !b.contains(c) {
1601 assert!(diff.contains(c));
1602 } else {
1603 assert!(!diff.contains(c));
1604 }
1605 }
1606 true
1607 }
1608}