closed_interval_set/
normalize.rs

1//! A normalized set of intervals consists of a sorted sequence of
2//! disjoint intervals.
3use crate::Backing;
4use crate::ClosedRange;
5use crate::Endpoint;
6use crate::RangeCase;
7use crate::RangeVec;
8
9/// Determines whether the input sequence is in normalized format:
10///  1. consists of valid intervals `(start, stop)` with `start <= stop`
11///  2. intervals are sorted by the `start` endpoint
12///  3. adjacent intervals are disjoint and separated by at least one `Endpoint` value
13///
14/// Checking this property takes time linear in the length of the input iterator.
15#[inline(always)]
16#[must_use]
17pub fn is_normalized<T: Endpoint>(
18    intervals: impl IntoIterator<Item: ClosedRange<EndT = T>>,
19) -> bool {
20    #[inline(never)]
21    fn doit<T: Endpoint>(mut iter: impl Iterator<Item: ClosedRange<EndT = T>>) -> bool {
22        use core::cmp::Ordering; // Safe because we always check validity, so bogus results are fine
23
24        let mut ret;
25        let mut prev_stop;
26
27        // Check the first range, if any.
28        match iter.next().map(|range| range.get()) {
29            Some((start, stop)) => {
30                // Range must be valid.
31                ret = start.is_valid() & stop.is_valid();
32                ret &= start.cmp_end(stop) <= Ordering::Equal;
33                prev_stop = stop;
34            }
35            // Empty sequence is normalized
36            None => return true,
37        }
38
39        // Safe to keep pulling from `iter`: we don't get here
40        // if it returned `None`.
41        for (start, stop) in iter.map(|range| range.get()) {
42            ret &= start.is_valid() & stop.is_valid();
43            // Each range must be valid
44            ret &= start.cmp_end(stop) <= Ordering::Equal;
45
46            // Find the next value immediately after `prev_stop`,
47            // or default to the max value if there is none.
48            let start_limit = prev_stop.next_after().unwrap_or(T::max_value());
49
50            // The next range must be strictly after start_limit, i.e.,
51            // with a gap between the two.  This also handles the case
52            // where `start_limit` saturated because `prev_stop` is
53            // already at the max value: the comparison is always
54            // false, exactly what we want (can't have an interval
55            // strictly after one that ends at the max value).
56            ret &= start_limit.cmp_end(start) < Ordering::Equal;
57
58            prev_stop = stop;
59        }
60
61        ret
62    }
63
64    doit(intervals.into_iter())
65}
66
67/// Normalizes the slice of intervals in place, in a prefix of the input
68/// slice.
69///
70/// Returns the size of the normalized prefix; remaining elements in
71/// the suffix of `intervals` are arbitrary (but were at some point in
72/// the original `intervals`).
73#[inline(always)]
74fn normalize_slice<T: Endpoint>(mut intervals: &mut [(T, T)]) -> usize {
75    use core::cmp::Ordering; // Safe because we only use results after validity check
76
77    let first_is_valid = match intervals.first() {
78        Some(first) => first.0.is_valid() & first.1.is_valid(),
79        None => return 0, // Empty slice is always valid
80    };
81
82    let is_sorted = intervals.is_sorted_by(|x, y| {
83        x.0.is_valid()
84            & x.1.is_valid()
85            & y.0.is_valid()
86            & y.1.is_valid()
87            & (T::cmp_range(*x, *y) <= Ordering::Equal)
88    });
89
90    if !(first_is_valid & is_sorted) {
91        // Move all valid values to the front.
92        let mut valid_prefix_len = 0usize;
93        for idx in 0..intervals.len() {
94            let cur = intervals[idx];
95            intervals[valid_prefix_len] = cur;
96            valid_prefix_len += (cur.0.is_valid() & cur.1.is_valid()) as usize;
97        }
98
99        intervals = &mut intervals[0..valid_prefix_len];
100
101        // Safe to compare because everything is valid.
102        intervals.sort_by(|x, y| T::cmp_range(*x, *y));
103    }
104
105    // Once we get here, `intervals` is all valid and sorted, it's safe to compare.
106    // The destination is just before the end of the prefix.
107    let mut prefix_len = 0usize;
108    for idx in 0..intervals.len() {
109        assert!(prefix_len <= idx);
110
111        let (cur_start, cur_stop) = intervals[idx];
112        // Empty interval. skip
113        if cur_start.cmp_end(cur_stop) > Ordering::Equal {
114            continue;
115        }
116
117        let dst = if prefix_len == 0 {
118            intervals[prefix_len] = (cur_start, cur_stop);
119            prefix_len = 1;
120            0
121        } else {
122            // prefix_len > 0.
123            prefix_len - 1
124        };
125
126        assert!(dst <= idx);
127        let (acc_start, acc_stop) = intervals[prefix_len - 1];
128        debug_assert!(acc_start.cmp_end(acc_stop) <= Ordering::Equal);
129        debug_assert!(acc_start.cmp_end(cur_start) <= Ordering::Equal);
130        debug_assert!(cur_start.cmp_end(cur_stop) <= Ordering::Equal);
131        debug_assert!(acc_start.cmp_end(cur_stop) <= Ordering::Equal);
132
133        let next_start = acc_stop.next_after().unwrap_or(T::max_value());
134        if cur_start.cmp_end(next_start) <= Ordering::Equal {
135            intervals[dst] = (acc_start, acc_stop.top_end(cur_stop));
136        } else {
137            debug_assert!(
138                !((acc_start.cmp_end(cur_start) <= Ordering::Equal)
139                    & (acc_stop.cmp_end(cur_start) >= Ordering::Equal))
140            );
141            assert!(dst < idx);
142            intervals[dst + 1] = (cur_start, cur_stop);
143            prefix_len += 1
144        }
145    }
146
147    debug_assert!(is_normalized(&intervals[0..prefix_len]));
148
149    prefix_len
150}
151
152/// Normalizes the vector of intervals and returns a vector that
153/// represents the same set of values, without redundancy.
154///
155/// No-ops quickly when `intervals` is known to be normalized at
156/// compile time.
157///
158/// This operation always operates in place (constant space) and takes
159/// constant time when `intervals` is known to be normalized at
160/// compile time.
161///
162/// Barring pre-normalised input, this operation takes linear time
163/// when the input is already normalised or otherwise sorted, and
164/// \\(\mathcal{O}(n \log n)\\) time in the input size (number of
165/// ranges) in the general case.
166#[inline(always)]
167#[must_use]
168pub fn normalize_vec<T: Endpoint>(intervals: impl Into<RangeCase<T>>) -> RangeVec<T> {
169    #[inline(never)]
170    fn doit<T: Endpoint>(mut intervals: Backing<T>) -> RangeVec<T> {
171        let remainder = normalize_slice(&mut intervals[..]);
172        intervals.truncate(remainder);
173
174        unsafe { RangeVec::new_unchecked(intervals) }
175    }
176
177    match intervals.into().unerase() {
178        Ok(ret) => ret,
179        Err(vec) => doit(vec),
180    }
181}
182
183#[cfg(test)]
184#[cfg_attr(coverage_nightly, coverage(off))]
185mod test {
186    use super::*;
187    use alloc::vec;
188    use alloc::vec::Vec;
189
190    #[test]
191    fn test_smoke() {
192        let mut intervals: [(u8, u8); 7] =
193            [(1, 0), (1, 3), (4, 5), (2, 3), (7, 10), (20, 10), (7, 4)];
194        for start in 0..intervals.len() - 2 {
195            assert!(!is_normalized(&intervals[start..]));
196            let v = intervals[start..].to_vec();
197            assert!(!is_normalized(&v));
198            assert!(!is_normalized(v));
199        }
200
201        assert_eq!(normalize_slice(&mut intervals), 2);
202        assert_eq!(intervals[..2], [(1, 5), (7, 10)]);
203
204        let mut empty: [(u8, u8); 0] = [];
205        assert_eq!(normalize_slice(&mut empty), 0);
206
207        let mut empty: [(u8, u8); 0] = [];
208        assert_eq!(normalize_slice(&mut empty), 0);
209    }
210
211    #[test]
212    fn test_smoke_vec() {
213        let intervals: Vec<(u8, u8)> = vec![(1, 3), (3, 5), (2, 3), (7, 10)];
214        assert!(!is_normalized(&intervals));
215
216        let normalized_intervals = normalize_vec(intervals);
217        assert_eq!(normalized_intervals.inner(), &[(1, 5), (7, 10)]);
218        assert_eq!(
219            normalized_intervals.clone(),
220            normalize_vec(normalized_intervals)
221        );
222
223        assert_eq!(normalize_vec(Vec::<(u8, u8)>::new()).into_vec(), vec![]);
224    }
225
226    #[test]
227    fn test_units() {
228        for bits in 0..=u16::MAX {
229            let mut atomic_intervals = Backing::<u8>::new();
230            for i in (0..16u8).rev() {
231                if (bits & (1 << i)) != 0 {
232                    atomic_intervals.push((i, i))
233                }
234            }
235
236            assert!((atomic_intervals.len() <= 1) | (!is_normalized(&atomic_intervals[..])));
237            let mut intervals = atomic_intervals;
238            intervals = normalize_vec(intervals).into_inner();
239
240            assert!(intervals.is_sorted());
241
242            let mut result: u16 = 0;
243            for (left, right) in intervals.iter().copied() {
244                assert!(left <= right);
245                for i in left..=right {
246                    result |= 1 << i;
247                }
248            }
249
250            assert_eq!(bits, result);
251
252            // Check no overlap or adjacency.
253            for ((_, curr_stop), (next_start, _)) in intervals.iter().zip(intervals.iter().skip(1))
254            {
255                assert!(curr_stop < next_start);
256                assert!(next_start - curr_stop > 1);
257            }
258        }
259    }
260
261    #[test]
262    fn test_merge_normalized() {
263        for bits in 0..=u16::MAX {
264            let mut first = Backing::<u8>::new();
265            let mut second = Backing::<u8>::new();
266            for i in 0..8u8 {
267                if bits & (1 << i) != 0 {
268                    first.push((i, i))
269                }
270
271                if (bits >> 8) & (1 << i) != 0 {
272                    second.push((i, i))
273                }
274            }
275
276            first = normalize_vec(first).into_inner();
277            second = normalize_vec(second).into_inner();
278
279            let mut intervals = first;
280            intervals.extend(second);
281
282            intervals = normalize_vec(intervals).into_inner();
283            assert!(intervals.is_sorted());
284
285            let mut result: u16 = 0;
286            for (left, right) in intervals.iter().copied() {
287                assert!(left <= right);
288                for i in left..=right {
289                    result |= 1 << i;
290                }
291            }
292
293            assert_eq!((bits & 255) | (bits >> 8), result);
294
295            // Check no overlap or adjacency.
296            for ((_, curr_stop), (next_start, _)) in intervals.iter().zip(intervals.iter().skip(1))
297            {
298                assert!(curr_stop < next_start);
299                assert!(next_start - curr_stop > 1);
300            }
301        }
302    }
303
304    #[test]
305    fn test_merge_few_ranges() {
306        fn ranges_to_bits(entries: &[(u8, u8)]) -> u128 {
307            let mut ret = 0u128;
308
309            for (start, stop) in entries.iter().copied() {
310                assert!(start < 128);
311                assert!(stop < 128);
312
313                if start <= stop {
314                    for bit in start..=stop {
315                        ret |= 1u128 << bit;
316                    }
317                }
318            }
319
320            ret
321        }
322
323        fn test(entries: &[(u8, u8)]) {
324            let initial_bits = ranges_to_bits(entries);
325            let normalized = normalize_vec(entries.to_vec());
326            assert_eq!(initial_bits, ranges_to_bits(normalized.inner()));
327        }
328
329        for start_0 in 0..=10 {
330            for stop_0 in 0..=10 {
331                for start_1 in 0..=10 {
332                    for stop_1 in 0..=10 {
333                        for start_2 in 0..=10 {
334                            for stop_2 in 0..=10 {
335                                test(&[(start_0, stop_0), (start_1, stop_1), (start_2, stop_2)])
336                            }
337                        }
338                    }
339                }
340            }
341        }
342    }
343
344    // nans and such
345    #[test]
346    fn test_fp_limits() {
347        // NaN
348        assert!(!is_normalized([(f32::NAN, f32::NAN)]));
349        assert!(!is_normalized([(0.0, f32::NAN)]));
350        assert!(!is_normalized([(f64::NAN, 0.0)]));
351        assert!(!is_normalized([
352            (0.0, 1.0),
353            (f32::NAN, f32::NAN),
354            (2.0, 3.0)
355        ]));
356        assert!(!is_normalized([(0.0, 1.0), (f64::NAN, 10.0), (12.0, 13.0)]));
357        assert!(!is_normalized([(0.0, 1.0), (10.0, f64::NAN), (12.0, 13.0)]));
358        assert!(!is_normalized([(0.0, 1.0), (f64::NAN, 10.0)]));
359        assert!(!is_normalized([(0.0, 1.0), (10.0, f64::NAN)]));
360
361        // Also check NaN handling in `normalize`.
362        {
363            let norm = RangeVec::from_vec(vec![(f64::NAN, f64::NAN)]);
364            assert!(norm.is_empty());
365
366            let norm = RangeVec::from_vec(vec![(0.0, f64::NAN)]);
367            assert!(norm.is_empty());
368
369            let norm = RangeVec::from_vec(vec![(f64::NAN, 0.0)]);
370            assert!(norm.is_empty());
371        }
372
373        {
374            let norm = RangeVec::from_vec(vec![
375                (0.0, 1.0),
376                (f32::NAN, f32::NAN),
377                (2.0, 3.0),
378                (f32::NAN, 2.0),
379                (1.0, f32::NAN),
380            ]);
381            assert_eq!(norm.inner(), &[(0.0, 1.0), (2.0, 3.0)]);
382        }
383
384        // Signed zeros
385        assert!(!is_normalized([(0.0f64, -0.0f64)]));
386        assert!(is_normalized([(-0.0f64, 0.0f64)]));
387        assert!(is_normalized([(-0.0f64, -0.0f64)]));
388        assert!(is_normalized([(0.0f32, 0.0f32)]));
389
390        // Too close
391        assert!(!is_normalized([(-0.0f32, -0.0f32), (0.0f32, 0.0f32)]));
392        assert!(!is_normalized([(0.0, 0.0), (-0.0, 0.0)]));
393        assert!(!is_normalized([(0.0, 0.0), (-0.0, -0.0)]));
394
395        // Some infinities
396        assert!(!is_normalized([
397            (f32::NEG_INFINITY, -0.0f32), // too close
398            (0.0f32, f32::INFINITY)
399        ]));
400        assert!(is_normalized([
401            (f32::NEG_INFINITY, f32::NEG_INFINITY),
402            (0f32, f32::INFINITY)
403        ]));
404        // f64::MAX and f64::INFINITY are too close.
405        assert!(!is_normalized([
406            (f64::NEG_INFINITY, f64::MAX),
407            (f64::INFINITY, f64::INFINITY)
408        ]));
409    }
410
411    proptest::proptest! {
412        #[test]
413        fn is_normalized_negative(x: (u8, u8), y: (u8, u8), ranges: Vec<(u8, u8)>) {
414            let mut ranges = ranges;
415            ranges.push(x);
416            ranges.push(y);
417
418            // They can't both be right.
419            let ltr = is_normalized(&ranges);
420
421            ranges.reverse();
422            let rtl = is_normalized(&ranges);
423
424            assert!(!(ltr & rtl));
425        }
426
427        #[test]
428        fn is_normalized_positive(ranges: Vec<(u8, u8)>) {
429            let mut marks = vec![false; 256];
430
431            for (x, y) in ranges {
432                let (lo, hi) = (x.min(y), x.max(y));
433
434                for i in lo..=hi {
435                    marks[i as usize] = true;
436                }
437            }
438
439            let mut normalized_ranges = Vec::new();
440
441            for i in 0..marks.len() {
442                if !marks[i] {
443                    continue;
444                }
445
446                if i == 0 || !marks[i - 1] {
447                    normalized_ranges.push((i as u8, i as u8));
448                } else {
449                    normalized_ranges.last_mut().unwrap().1 = i as u8;
450                }
451            }
452
453            assert!(is_normalized(&normalized_ranges));
454
455            if !normalized_ranges.is_empty() {
456                normalized_ranges.push((0u8, 255u8));
457                assert!(!is_normalized(&normalized_ranges));
458                normalized_ranges.pop();
459            }
460
461            if normalized_ranges.len() > 1 {
462                normalized_ranges.reverse();
463                assert!(!is_normalized(&normalized_ranges));
464                normalized_ranges.reverse();
465
466                let first = normalized_ranges[0];
467                let last = *normalized_ranges.last().unwrap();
468
469                normalized_ranges[0] = last;
470                *normalized_ranges.last_mut().unwrap() = first;
471
472                assert!(!is_normalized(&normalized_ranges));
473            }
474        }
475
476        #[test]
477        fn test_normalize_vec(ranges: Vec<(u8, u8)>) {
478            use crate::ranges_to_bits;
479
480            let initial_marks = ranges_to_bits(&ranges);
481            let normalized = normalize_vec(ranges.clone());
482
483            // Normalizing a RangeVec should no-op.
484            let clone = normalized.clone();
485            let clone_ptr = clone.as_ptr() as usize;
486            let double_normalized = normalize_vec(clone);
487            // This doesn't test as much you'd think because even full
488            // normalization is in-place.
489            if double_normalized.len() > crate::INLINE_SIZE {
490                assert_eq!(clone_ptr, double_normalized.as_ptr() as usize);
491            }
492
493            assert_eq!(&normalized, &double_normalized);
494
495            assert_eq!(&initial_marks, &ranges_to_bits(&normalized));
496            assert_eq!(&initial_marks, &ranges_to_bits(&double_normalized));
497
498            assert_eq!(&normalized, &RangeVec::from_vec(ranges));
499            assert_eq!(&normalized, &RangeVec::from_smallvec(double_normalized.clone().into_inner()));
500            assert_eq!(&normalized, &RangeVec::from_vec(double_normalized.into_vec()));
501        }
502
503        #[test]
504        fn test_smoke_is_normalized_vec_f32(mut ranges: Vec<(f32, f32)>) {
505            let ltr = is_normalized(&ranges);
506            ranges.reverse();
507            let rtl = is_normalized(&ranges);
508
509            if ranges.is_empty() {
510                assert!(ltr);
511                assert!(rtl);
512            } else {
513                let first = ranges[0];
514                if ranges
515                    .iter()
516                    .all(|x| f32::cmp_range(*x, first) == core::cmp::Ordering::Equal)
517                {
518                    assert_eq!(ltr, rtl);
519                } else {
520                    // They can't both be correct
521                    assert!(!ltr || !rtl);
522                }
523            }
524        }
525
526        #[test]
527        fn test_smoke_normalize_vec_f64(ranges: Vec<(f64, f64)>) {
528            let normalized = normalize_vec(ranges.clone());
529
530            // Normalizing a RangeVec should no-op.
531            let clone = normalized.clone();
532            let clone_ptr = clone.as_ptr() as usize;
533            let double_normalized = normalize_vec(clone);
534            // This doesn't test as much you'd think because even full
535            // normalization is in-place.
536            if double_normalized.len() > crate::INLINE_SIZE {
537                assert_eq!(clone_ptr, double_normalized.as_ptr() as usize);
538            }
539            assert_eq!(&normalized, &double_normalized);
540
541            assert_eq!(&normalized, &RangeVec::from_vec(ranges));
542            assert_eq!(
543                &normalized,
544                &RangeVec::from_smallvec(double_normalized.into_inner())
545            );
546        }
547    }
548}