closed_interval_set/
normalize.rs

1//! A normalized set of intervals consists of a sorted sequence of
2//! disjoint intervals.
3use crate::Backing;
4use crate::ClosedRange;
5use crate::Endpoint;
6use crate::RangeCase;
7use crate::RangeVec;
8
9/// Determines whether the input sequence is in normalized format:
10///  1. consists of valid intervals `(start, stop)` with `start <= stop`
11///  2. intervals are sorted by the `start` endpoint
12///  3. adjacent intervals are disjoint and separated by at least one `Endpoint` value
13///
14/// Checking this property takes time linear in the length of the input iterator.
15#[inline(always)]
16pub fn is_normalized<T: Endpoint>(
17    intervals: impl IntoIterator<Item: ClosedRange<EndT = T>>,
18) -> bool {
19    #[inline(never)]
20    fn doit<T: Endpoint>(mut iter: impl Iterator<Item: ClosedRange<EndT = T>>) -> bool {
21        use core::cmp::Ordering; // Safe because we always check validity, so bogus results are fine
22
23        let mut ret;
24        let mut prev_stop;
25
26        // Check the first range, if any.
27        match iter.next().map(|range| range.get()) {
28            Some((start, stop)) => {
29                // Range must be valid.
30                ret = start.is_valid() & stop.is_valid();
31                ret &= start.cmp_end(stop) <= Ordering::Equal;
32                prev_stop = stop;
33            }
34            // Empty sequence is normalized
35            None => return true,
36        }
37
38        // Safe to keep pulling from `iter`: we don't get here
39        // if it returned `None`.
40        for (start, stop) in iter.map(|range| range.get()) {
41            ret &= start.is_valid() & stop.is_valid();
42            // Each range must be valid
43            ret &= start.cmp_end(stop) <= Ordering::Equal;
44
45            // Find the next value immediately after `prev_stop`,
46            // or default to the max value if there is none.
47            let start_limit = prev_stop.next_after().unwrap_or(T::max_value());
48
49            // The next range must be strictly after start_limit, i.e.,
50            // with a gap between the two.  This also handles the case
51            // where `start_limit` saturated because `prev_stop` is
52            // already at the max value: the comparison is always
53            // false, exactly what we want (can't have an interval
54            // strictly after one that ends at the max value).
55            ret &= start_limit.cmp_end(start) < Ordering::Equal;
56
57            prev_stop = stop;
58        }
59
60        ret
61    }
62
63    doit(intervals.into_iter())
64}
65
66/// Normalizes the slice of intervals in place, in a prefix of the input
67/// slice.
68///
69/// Returns the size of the normalized prefix; remaining elements in
70/// the suffix of `intervals` are arbitrary (but were at some point in
71/// the original `intervals`).
72#[inline(always)]
73fn normalize_slice<T: Endpoint>(mut intervals: &mut [(T, T)]) -> usize {
74    use core::cmp::Ordering; // Safe because we only use results after validity check
75
76    let first_is_valid = match intervals.first() {
77        Some(first) => first.0.is_valid() & first.1.is_valid(),
78        None => return 0, // Empty slice is always valid
79    };
80
81    let is_sorted = intervals.is_sorted_by(|x, y| {
82        x.0.is_valid()
83            & x.1.is_valid()
84            & y.0.is_valid()
85            & y.1.is_valid()
86            & (T::cmp_range(*x, *y) <= Ordering::Equal)
87    });
88
89    if !(first_is_valid & is_sorted) {
90        // Move all valid values to the front.
91        let mut valid_prefix_len = 0usize;
92        for idx in 0..intervals.len() {
93            let cur = intervals[idx];
94            intervals[valid_prefix_len] = cur;
95            valid_prefix_len += (cur.0.is_valid() & cur.1.is_valid()) as usize;
96        }
97
98        intervals = &mut intervals[0..valid_prefix_len];
99
100        // Safe to compare because everything is valid.
101        intervals.sort_by(|x, y| T::cmp_range(*x, *y));
102    }
103
104    // Once we get here, `intervals` is all valid and sorted, it's safe to compare.
105    // The destination is just before the end of the prefix.
106    let mut prefix_len = 0usize;
107    for idx in 0..intervals.len() {
108        assert!(prefix_len <= idx);
109
110        let (cur_start, cur_stop) = intervals[idx];
111        // Empty interval. skip
112        if cur_start.cmp_end(cur_stop) > Ordering::Equal {
113            continue;
114        }
115
116        let dst = if prefix_len == 0 {
117            intervals[prefix_len] = (cur_start, cur_stop);
118            prefix_len = 1;
119            0
120        } else {
121            // prefix_len > 0.
122            prefix_len - 1
123        };
124
125        assert!(dst <= idx);
126        let (acc_start, acc_stop) = intervals[prefix_len - 1];
127        debug_assert!(acc_start.cmp_end(acc_stop) <= Ordering::Equal);
128        debug_assert!(acc_start.cmp_end(cur_start) <= Ordering::Equal);
129        debug_assert!(cur_start.cmp_end(cur_stop) <= Ordering::Equal);
130        debug_assert!(acc_start.cmp_end(cur_stop) <= Ordering::Equal);
131
132        let next_start = acc_stop.next_after().unwrap_or(T::max_value());
133        if cur_start.cmp_end(next_start) <= Ordering::Equal {
134            intervals[dst] = (acc_start, acc_stop.top_end(cur_stop));
135        } else {
136            debug_assert!(
137                !((acc_start.cmp_end(cur_start) <= Ordering::Equal)
138                    & (acc_stop.cmp_end(cur_start) >= Ordering::Equal))
139            );
140            assert!(dst < idx);
141            intervals[dst + 1] = (cur_start, cur_stop);
142            prefix_len += 1
143        }
144    }
145
146    debug_assert!(is_normalized(&intervals[0..prefix_len]));
147
148    prefix_len
149}
150
151/// Normalizes the vector of intervals and returns a vector that
152/// represents the same set of values, without redundancy.
153///
154/// No-ops quickly when `intervals` is known to be normalized at
155/// compile time.
156///
157/// This operation always operates in place (constant space) and takes
158/// constant time when `intervals` is known to be normalized at
159/// compile time.
160///
161/// Barring pre-normalised input, this operation takes linear time
162/// when the input is already normalised or otherwise sorted, and
163/// \\(\mathcal{O}(n \log n)\\) time in the input size (number of
164/// ranges) in the general case.
165#[inline(always)]
166pub fn normalize_vec<T: Endpoint>(intervals: impl Into<RangeCase<T>>) -> RangeVec<T> {
167    #[inline(never)]
168    fn doit<T: Endpoint>(mut intervals: Backing<T>) -> RangeVec<T> {
169        let remainder = normalize_slice(&mut intervals[..]);
170        intervals.truncate(remainder);
171
172        unsafe { RangeVec::new_unchecked(intervals) }
173    }
174
175    match intervals.into().unerase() {
176        Ok(ret) => ret,
177        Err(vec) => doit(vec),
178    }
179}
180
181#[cfg(test)]
182#[cfg_attr(coverage_nightly, coverage(off))]
183mod test {
184    use super::*;
185    use alloc::vec;
186    use alloc::vec::Vec;
187
188    #[test]
189    fn test_smoke() {
190        let mut intervals: [(u8, u8); 7] =
191            [(1, 0), (1, 3), (4, 5), (2, 3), (7, 10), (20, 10), (7, 4)];
192        for start in 0..intervals.len() - 2 {
193            assert!(!is_normalized(&intervals[start..]));
194            let v = intervals[start..].to_vec();
195            assert!(!is_normalized(&v));
196            assert!(!is_normalized(v));
197        }
198
199        assert_eq!(normalize_slice(&mut intervals), 2);
200        assert_eq!(intervals[..2], [(1, 5), (7, 10)]);
201
202        let mut empty: [(u8, u8); 0] = [];
203        assert_eq!(normalize_slice(&mut empty), 0);
204
205        let mut empty: [(u8, u8); 0] = [];
206        assert_eq!(normalize_slice(&mut empty), 0);
207    }
208
209    #[test]
210    fn test_smoke_vec() {
211        let intervals: Vec<(u8, u8)> = vec![(1, 3), (3, 5), (2, 3), (7, 10)];
212        assert!(!is_normalized(&intervals));
213
214        let normalized_intervals = normalize_vec(intervals);
215        assert_eq!(normalized_intervals.inner(), &[(1, 5), (7, 10)]);
216        assert_eq!(
217            normalized_intervals.clone(),
218            normalize_vec(normalized_intervals)
219        );
220
221        assert_eq!(normalize_vec(Vec::<(u8, u8)>::new()).into_vec(), vec![]);
222    }
223
224    #[test]
225    fn test_units() {
226        for bits in 0..=u16::MAX {
227            let mut atomic_intervals = Backing::<u8>::new();
228            for i in (0..16u8).rev() {
229                if (bits & (1 << i)) != 0 {
230                    atomic_intervals.push((i, i))
231                }
232            }
233
234            assert!((atomic_intervals.len() <= 1) | (!is_normalized(&atomic_intervals[..])));
235            let mut intervals = atomic_intervals;
236            intervals = normalize_vec(intervals).into_inner();
237
238            assert!(intervals.is_sorted());
239
240            let mut result: u16 = 0;
241            for (left, right) in intervals.iter().copied() {
242                assert!(left <= right);
243                for i in left..=right {
244                    result |= 1 << i;
245                }
246            }
247
248            assert_eq!(bits, result);
249
250            // Check no overlap or adjacency.
251            for ((_, curr_stop), (next_start, _)) in intervals.iter().zip(intervals.iter().skip(1))
252            {
253                assert!(curr_stop < next_start);
254                assert!(next_start - curr_stop > 1);
255            }
256        }
257    }
258
259    #[test]
260    fn test_merge_normalized() {
261        for bits in 0..=u16::MAX {
262            let mut first = Backing::<u8>::new();
263            let mut second = Backing::<u8>::new();
264            for i in 0..8u8 {
265                if bits & (1 << i) != 0 {
266                    first.push((i, i))
267                }
268
269                if (bits >> 8) & (1 << i) != 0 {
270                    second.push((i, i))
271                }
272            }
273
274            first = normalize_vec(first).into_inner();
275            second = normalize_vec(second).into_inner();
276
277            let mut intervals = first;
278            intervals.extend(second);
279
280            intervals = normalize_vec(intervals).into_inner();
281            assert!(intervals.is_sorted());
282
283            let mut result: u16 = 0;
284            for (left, right) in intervals.iter().copied() {
285                assert!(left <= right);
286                for i in left..=right {
287                    result |= 1 << i;
288                }
289            }
290
291            assert_eq!((bits & 255) | (bits >> 8), result);
292
293            // Check no overlap or adjacency.
294            for ((_, curr_stop), (next_start, _)) in intervals.iter().zip(intervals.iter().skip(1))
295            {
296                assert!(curr_stop < next_start);
297                assert!(next_start - curr_stop > 1);
298            }
299        }
300    }
301
302    #[test]
303    fn test_merge_few_ranges() {
304        fn ranges_to_bits(entries: &[(u8, u8)]) -> u128 {
305            let mut ret = 0u128;
306
307            for (start, stop) in entries.iter().copied() {
308                assert!(start < 128);
309                assert!(stop < 128);
310
311                if start <= stop {
312                    for bit in start..=stop {
313                        ret |= 1u128 << bit;
314                    }
315                }
316            }
317
318            ret
319        }
320
321        fn test(entries: &[(u8, u8)]) {
322            let initial_bits = ranges_to_bits(entries);
323            let normalized = normalize_vec(entries.to_vec());
324            assert_eq!(initial_bits, ranges_to_bits(normalized.inner()));
325        }
326
327        for start_0 in 0..=10 {
328            for stop_0 in 0..=10 {
329                for start_1 in 0..=10 {
330                    for stop_1 in 0..=10 {
331                        for start_2 in 0..=10 {
332                            for stop_2 in 0..=10 {
333                                test(&[(start_0, stop_0), (start_1, stop_1), (start_2, stop_2)])
334                            }
335                        }
336                    }
337                }
338            }
339        }
340    }
341
342    // nans and such
343    #[test]
344    fn test_fp_limits() {
345        // NaN
346        assert!(!is_normalized([(f32::NAN, f32::NAN)]));
347        assert!(!is_normalized([(0.0, f32::NAN)]));
348        assert!(!is_normalized([(f64::NAN, 0.0)]));
349        assert!(!is_normalized([
350            (0.0, 1.0),
351            (f32::NAN, f32::NAN),
352            (2.0, 3.0)
353        ]));
354        assert!(!is_normalized([(0.0, 1.0), (f64::NAN, 10.0), (12.0, 13.0)]));
355        assert!(!is_normalized([(0.0, 1.0), (10.0, f64::NAN), (12.0, 13.0)]));
356        assert!(!is_normalized([(0.0, 1.0), (f64::NAN, 10.0)]));
357        assert!(!is_normalized([(0.0, 1.0), (10.0, f64::NAN)]));
358
359        // Also check NaN handling in `normalize`.
360        {
361            let norm = RangeVec::from_vec(vec![(f64::NAN, f64::NAN)]);
362            assert!(norm.is_empty());
363
364            let norm = RangeVec::from_vec(vec![(0.0, f64::NAN)]);
365            assert!(norm.is_empty());
366
367            let norm = RangeVec::from_vec(vec![(f64::NAN, 0.0)]);
368            assert!(norm.is_empty());
369        }
370
371        {
372            let norm = RangeVec::from_vec(vec![
373                (0.0, 1.0),
374                (f32::NAN, f32::NAN),
375                (2.0, 3.0),
376                (f32::NAN, 2.0),
377                (1.0, f32::NAN),
378            ]);
379            assert_eq!(norm.inner(), &[(0.0, 1.0), (2.0, 3.0)]);
380        }
381
382        // Signed zeros
383        assert!(!is_normalized([(0.0f64, -0.0f64)]));
384        assert!(is_normalized([(-0.0f64, 0.0f64)]));
385        assert!(is_normalized([(-0.0f64, -0.0f64)]));
386        assert!(is_normalized([(0.0f32, 0.0f32)]));
387
388        // Too close
389        assert!(!is_normalized([(-0.0f32, -0.0f32), (0.0f32, 0.0f32)]));
390        assert!(!is_normalized([(0.0, 0.0), (-0.0, 0.0)]));
391        assert!(!is_normalized([(0.0, 0.0), (-0.0, -0.0)]));
392
393        // Some infinities
394        assert!(!is_normalized([
395            (f32::NEG_INFINITY, -0.0f32), // too close
396            (0.0f32, f32::INFINITY)
397        ]));
398        assert!(is_normalized([
399            (f32::NEG_INFINITY, f32::NEG_INFINITY),
400            (0f32, f32::INFINITY)
401        ]));
402        // f64::MAX and f64::INFINITY are too close.
403        assert!(!is_normalized([
404            (f64::NEG_INFINITY, f64::MAX),
405            (f64::INFINITY, f64::INFINITY)
406        ]));
407    }
408
409    proptest::proptest! {
410        #[test]
411        fn is_normalized_negative(x: (u8, u8), y: (u8, u8), ranges: Vec<(u8, u8)>) {
412            let mut ranges = ranges;
413            ranges.push(x);
414            ranges.push(y);
415
416            // They can't both be right.
417            let ltr = is_normalized(&ranges);
418
419            ranges.reverse();
420            let rtl = is_normalized(&ranges);
421
422            assert!(!(ltr & rtl));
423        }
424
425        #[test]
426        fn is_normalized_positive(ranges: Vec<(u8, u8)>) {
427            let mut marks = vec![false; 256];
428
429            for (x, y) in ranges {
430                let (lo, hi) = (x.min(y), x.max(y));
431
432                for i in lo..=hi {
433                    marks[i as usize] = true;
434                }
435            }
436
437            let mut normalized_ranges = Vec::new();
438
439            for i in 0..marks.len() {
440                if !marks[i] {
441                    continue;
442                }
443
444                if i == 0 || !marks[i - 1] {
445                    normalized_ranges.push((i as u8, i as u8));
446                } else {
447                    normalized_ranges.last_mut().unwrap().1 = i as u8;
448                }
449            }
450
451            assert!(is_normalized(&normalized_ranges));
452
453            if !normalized_ranges.is_empty() {
454                normalized_ranges.push((0u8, 255u8));
455                assert!(!is_normalized(&normalized_ranges));
456                normalized_ranges.pop();
457            }
458
459            if normalized_ranges.len() > 1 {
460                normalized_ranges.reverse();
461                assert!(!is_normalized(&normalized_ranges));
462                normalized_ranges.reverse();
463
464                let first = normalized_ranges[0];
465                let last = *normalized_ranges.last().unwrap();
466
467                normalized_ranges[0] = last;
468                *normalized_ranges.last_mut().unwrap() = first;
469
470                assert!(!is_normalized(&normalized_ranges));
471            }
472        }
473
474        #[test]
475        fn test_normalize_vec(ranges: Vec<(u8, u8)>) {
476            use crate::ranges_to_bits;
477
478            let initial_marks = ranges_to_bits(&ranges);
479            let normalized = normalize_vec(ranges.clone());
480
481            // Normalizing a RangeVec should no-op.
482            let clone = normalized.clone();
483            let clone_ptr = clone.as_ptr() as usize;
484            let double_normalized = normalize_vec(clone);
485            // This doesn't test as much you'd think because even full
486            // normalization is in-place.
487            if double_normalized.len() > crate::INLINE_SIZE {
488                assert_eq!(clone_ptr, double_normalized.as_ptr() as usize);
489            }
490
491            assert_eq!(&normalized, &double_normalized);
492
493            assert_eq!(&initial_marks, &ranges_to_bits(&normalized));
494            assert_eq!(&initial_marks, &ranges_to_bits(&double_normalized));
495
496            assert_eq!(&normalized, &RangeVec::from_vec(ranges));
497            assert_eq!(&normalized, &RangeVec::from_smallvec(double_normalized.clone().into_inner()));
498            assert_eq!(&normalized, &RangeVec::from_vec(double_normalized.into_vec()));
499        }
500
501        #[test]
502        fn test_smoke_is_normalized_vec_f32(mut ranges: Vec<(f32, f32)>) {
503            let ltr = is_normalized(&ranges);
504            ranges.reverse();
505            let rtl = is_normalized(&ranges);
506
507            if ranges.is_empty() {
508                assert!(ltr);
509                assert!(rtl);
510            } else {
511                let first = ranges[0];
512                if ranges
513                    .iter()
514                    .all(|x| f32::cmp_range(*x, first) == core::cmp::Ordering::Equal)
515                {
516                    assert_eq!(ltr, rtl);
517                } else {
518                    // They can't both be correct
519                    assert!(!ltr || !rtl);
520                }
521            }
522        }
523
524        #[test]
525        fn test_smoke_normalize_vec_f64(ranges: Vec<(f64, f64)>) {
526            let normalized = normalize_vec(ranges.clone());
527
528            // Normalizing a RangeVec should no-op.
529            let clone = normalized.clone();
530            let clone_ptr = clone.as_ptr() as usize;
531            let double_normalized = normalize_vec(clone);
532            // This doesn't test as much you'd think because even full
533            // normalization is in-place.
534            if double_normalized.len() > crate::INLINE_SIZE {
535                assert_eq!(clone_ptr, double_normalized.as_ptr() as usize);
536            }
537            assert_eq!(&normalized, &double_normalized);
538
539            assert_eq!(&normalized, &RangeVec::from_vec(ranges));
540            assert_eq!(
541                &normalized,
542                &RangeVec::from_smallvec(double_normalized.into_inner())
543            );
544        }
545    }
546}