Skip to main content

int_interval_set/int_co_set/
impls_for_construction.rs

1use super::*;
2
3use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
4
5use crate::int_co_set::funcs_for_canonicalization::{is_canonical, merge, normalize};
6
7const BATCH_SIZE: usize = 128;
8
9impl<I: IntCO> IntCOSet<I> {
10    /// Builds a set from an already canonical interval vector.
11    ///
12    /// # Safety
13    ///
14    /// The caller must guarantee that `intervals` is canonical:
15    ///
16    /// - intervals are sorted by ascending `start`;
17    /// - intervals are non-overlapping;
18    /// - contiguous intervals have already been merged;
19    /// - therefore, for every adjacent pair `a, b`,
20    ///   `a.end_excl() < b.start()` holds.
21    ///
22    /// Violating this invariant can make binary-search based queries
23    /// return incorrect results.
24    #[inline]
25    pub unsafe fn new_unchecked(intervals: Vec<I>) -> Self {
26        debug_assert!(is_canonical(&intervals));
27
28        Self {
29            intervals: Arc::from(intervals.into_boxed_slice()),
30        }
31    }
32}
33
34impl<I: IntCO> Default for IntCOSet<I> {
35    fn default() -> Self {
36        Self {
37            intervals: Arc::new([]),
38        }
39    }
40}
41
42impl<I: IntCO> FromIterator<I> for IntCOSet<I> {
43    fn from_iter<T>(iter: T) -> Self
44    where
45        T: IntoIterator<Item = I>,
46    {
47        let mut batch = Vec::with_capacity(BATCH_SIZE);
48        let mut reduced = Vec::new();
49
50        for iv in iter {
51            batch.push(iv);
52
53            if batch.len() == BATCH_SIZE {
54                let run = normalize(std::mem::replace(
55                    &mut batch,
56                    Vec::with_capacity(BATCH_SIZE),
57                ));
58
59                reduced = merge(reduced, run);
60            }
61        }
62
63        if !batch.is_empty() {
64            reduced = merge(reduced, normalize(batch));
65        }
66
67        // SAFETY:
68        // Each normalized batch is canonical. `merge` preserves sorted,
69        // non-overlapping, and non-adjacent interval invariants.
70        unsafe { IntCOSet::new_unchecked(reduced) }
71    }
72}
73
74impl<I> FromParallelIterator<I> for IntCOSet<I>
75where
76    I: IntCO + Send,
77{
78    fn from_par_iter<T>(iter: T) -> Self
79    where
80        T: IntoParallelIterator<Item = I>,
81    {
82        let reduced = iter
83            .into_par_iter()
84            .fold(
85                || (Vec::with_capacity(BATCH_SIZE), Vec::<I>::new()),
86                |(mut batch, mut reduced), iv| {
87                    batch.push(iv);
88
89                    if batch.len() == BATCH_SIZE {
90                        let run = normalize(std::mem::replace(
91                            &mut batch,
92                            Vec::with_capacity(BATCH_SIZE),
93                        ));
94
95                        reduced = merge(reduced, run);
96                    }
97
98                    (batch, reduced)
99                },
100            )
101            .map(|(batch, reduced)| {
102                if batch.is_empty() {
103                    reduced
104                } else {
105                    merge(reduced, normalize(batch))
106                }
107            })
108            .reduce(Vec::new, merge);
109
110        // SAFETY:
111        // Every parallel fold result is canonical, and `merge` preserves the
112        // canonical interval invariant during reduction.
113        unsafe { IntCOSet::new_unchecked(reduced) }
114    }
115}
116
117#[cfg(test)]
118mod tests_for_from_iter;
119#[cfg(test)]
120mod tests_for_from_par_iter;