1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
use super::*;
use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
use crate::int_co_set::funcs_for_canonicalization::{is_canonical, merge, normalize};
const BATCH_SIZE: usize = 128;
impl<I: IntCO> IntCOSet<I> {
/// Builds a set from an already canonical interval vector.
///
/// # Safety
///
/// The caller must guarantee that `intervals` is canonical:
///
/// - intervals are sorted by ascending `start`;
/// - intervals are non-overlapping;
/// - contiguous intervals have already been merged;
/// - therefore, for every adjacent pair `a, b`,
/// `a.end_excl() < b.start()` holds.
///
/// Violating this invariant can make binary-search based queries
/// return incorrect results.
#[inline]
pub unsafe fn new_unchecked(intervals: Vec<I>) -> Self {
debug_assert!(is_canonical(&intervals));
Self {
intervals: Arc::from(intervals.into_boxed_slice()),
}
}
}
impl<I: IntCO> Default for IntCOSet<I> {
fn default() -> Self {
Self {
intervals: Arc::new([]),
}
}
}
impl<I: IntCO> FromIterator<I> for IntCOSet<I> {
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = I>,
{
let mut batch = Vec::with_capacity(BATCH_SIZE);
let mut reduced = Vec::new();
for iv in iter {
batch.push(iv);
if batch.len() == BATCH_SIZE {
let run = normalize(std::mem::replace(
&mut batch,
Vec::with_capacity(BATCH_SIZE),
));
reduced = merge(reduced, run);
}
}
if !batch.is_empty() {
reduced = merge(reduced, normalize(batch));
}
// SAFETY:
// Each normalized batch is canonical. `merge` preserves sorted,
// non-overlapping, and non-adjacent interval invariants.
unsafe { IntCOSet::new_unchecked(reduced) }
}
}
impl<I> FromParallelIterator<I> for IntCOSet<I>
where
I: IntCO + Send,
{
fn from_par_iter<T>(iter: T) -> Self
where
T: IntoParallelIterator<Item = I>,
{
let reduced = iter
.into_par_iter()
.fold(
|| (Vec::with_capacity(BATCH_SIZE), Vec::<I>::new()),
|(mut batch, mut reduced), iv| {
batch.push(iv);
if batch.len() == BATCH_SIZE {
let run = normalize(std::mem::replace(
&mut batch,
Vec::with_capacity(BATCH_SIZE),
));
reduced = merge(reduced, run);
}
(batch, reduced)
},
)
.map(|(batch, reduced)| {
if batch.is_empty() {
reduced
} else {
merge(reduced, normalize(batch))
}
})
.reduce(Vec::new, merge);
// SAFETY:
// Every parallel fold result is canonical, and `merge` preserves the
// canonical interval invariant during reduction.
unsafe { IntCOSet::new_unchecked(reduced) }
}
}
#[cfg(test)]
mod tests_for_from_iter;
#[cfg(test)]
mod tests_for_from_par_iter;