int_interval_set/i8.rs
1#[cfg(test)]
2mod tests;
3
4use std::sync::Arc;
5
6use int_interval::I8CO;
7
8/// Read-only canonical interval set for `I8CO`.
9///
10/// A `I8COSet` stores a normalized collection of half-open `I8CO` intervals:
11///
12/// - intervals are sorted by `start`;
13/// - intervals are non-overlapping;
14/// - adjacent intervals are merged;
15/// - all queries are performed against the sealed immutable array.
16///
17/// Construction is intentionally restricted. Use `I8COSetBuilder` for normal
18/// construction.
19pub mod set {
20 use super::*;
21
22 /// Immutable canonical interval set.
23 ///
24 /// Internally this is an `Arc<[I8CO]>`, so cloning a `I8COSet` is cheap.
25 ///
26 /// Canonical invariant:
27 ///
28 /// ```text
29 /// for every adjacent pair a, b:
30 /// a.end_excl() < b.start()
31 /// ```
32 ///
33 /// The strict `<` means both overlap and adjacency have already been merged.
34 #[repr(transparent)]
35 #[derive(Clone, Debug, Eq, PartialEq)]
36 pub struct I8COSet {
37 intervals: Arc<[I8CO]>,
38 }
39
40 // ------------------------------------------------------------
41 // basic api: construction / accessors
42 // ------------------------------------------------------------
43
44 mod basic {
45 use super::*;
46
47 impl I8COSet {
48 /// Builds a set from an already canonical interval vector.
49 ///
50 /// # Safety
51 ///
52 /// The caller must guarantee that `intervals` is canonical:
53 ///
54 /// - intervals are sorted by ascending `start`;
55 /// - intervals are non-overlapping;
56 /// - contiguous intervals have already been merged;
57 /// - therefore, for every adjacent pair `a, b`,
58 /// `a.end_excl() < b.start()` holds.
59 ///
60 /// Violating this invariant can make binary-search based queries
61 /// return incorrect results.
62 #[inline]
63 pub(in crate::i8) unsafe fn new_unchecked(intervals: Vec<I8CO>) -> Self {
64 debug_assert!(Self::is_canonical(&intervals));
65
66 Self {
67 intervals: Arc::from(intervals.into_boxed_slice()),
68 }
69 }
70
71 /// Checks the canonical invariant used by binary-search queries.
72 ///
73 /// `I8CO` itself already guarantees single-interval validity. This
74 /// function only checks the relationship between adjacent intervals.
75 #[inline]
76 fn is_canonical(intervals: &[I8CO]) -> bool {
77 intervals.windows(2).all(|w| w[0].end_excl() < w[1].start())
78 }
79 }
80
81 impl I8COSet {
82 /// Returns the number of canonical intervals.
83 ///
84 /// For the `I8CO` domain, the maximum canonical interval count is 128,
85 /// e.g. alternating single-point intervals across `[i8::MIN, i8::MAX)`.
86 #[inline]
87 pub fn interval_count(&self) -> u8 {
88 self.intervals.len() as u8
89 }
90
91 /// Returns whether the set contains no intervals.
92 #[inline]
93 pub fn is_empty(&self) -> bool {
94 self.intervals.is_empty()
95 }
96
97 /// Returns the canonical interval slice.
98 ///
99 /// The returned slice is sorted, non-overlapping, and contains no
100 /// adjacent intervals.
101 #[inline]
102 pub fn as_slice(&self) -> &[I8CO] {
103 &self.intervals
104 }
105
106 /// Iterates over canonical intervals by value.
107 #[inline]
108 pub fn iter_intervals(&self) -> impl Iterator<Item = I8CO> {
109 self.intervals.iter().copied()
110 }
111 }
112 }
113
114 // ------------------------------------------------------------
115 // predicate api: yes/no queries
116 // ------------------------------------------------------------
117
118 mod predicates {
119 use super::*;
120
121 impl I8COSet {
122 /// Returns whether `x` is covered by any interval in the set.
123 ///
124 /// Complexity: `O(log n)`.
125 #[inline]
126 pub fn contains_point(&self, x: i8) -> bool {
127 let i = self.intervals.partition_point(|iv| iv.start() <= x);
128 i != 0 && self.intervals[i - 1].contains(x)
129 }
130
131 /// Returns whether `query` is fully contained by one interval.
132 ///
133 /// Since the set is canonical, a contained query interval can only
134 /// be contained by the interval immediately preceding or starting
135 /// at `query.start()`.
136 ///
137 /// Complexity: `O(log n)`.
138 #[inline]
139 pub fn contains_interval(&self, query: I8CO) -> bool {
140 let i = self
141 .intervals
142 .partition_point(|iv| iv.start() <= query.start());
143
144 i != 0 && self.intervals[i - 1].contains_interval(query)
145 }
146
147 /// Returns whether `query` intersects any interval in the set.
148 ///
149 /// Complexity: `O(log n)`.
150 #[inline]
151 pub fn intersects_interval(&self, query: I8CO) -> bool {
152 let i = self
153 .intervals
154 .partition_point(|iv| iv.end_excl() <= query.start());
155
156 self.intervals.get(i).is_some_and(|iv| iv.intersects(query))
157 }
158 }
159 }
160
161 // ------------------------------------------------------------
162 // search api: returning matched intervals
163 // ------------------------------------------------------------
164
165 mod search {
166 use super::*;
167
168 /// Point-based search APIs.
169 mod point {
170 use super::*;
171
172 impl I8COSet {
173 /// Returns the unique interval containing `x`, if any.
174 ///
175 /// Because the set is canonical, at most one interval can
176 /// contain a single point.
177 ///
178 /// Complexity: `O(log n)`.
179 #[inline]
180 pub fn interval_containing_point(&self, x: i8) -> Option<I8CO> {
181 let i = self.intervals.partition_point(|iv| iv.start() <= x);
182
183 if i == 0 {
184 return None;
185 }
186
187 let iv = self.intervals[i - 1];
188 iv.contains(x).then_some(iv)
189 }
190 }
191 }
192
193 /// Interval-based search APIs.
194 mod interval {
195 use super::*;
196
197 impl I8COSet {
198 /// Iterates over all canonical intervals intersecting `query`.
199 ///
200 /// The iterator yields original intervals stored in the set,
201 /// not clipped intersection segments.
202 ///
203 /// Complexity: `O(log n + k)`, where `k` is the number of
204 /// returned intervals.
205 #[inline]
206 pub fn intervals_intersecting(&self, query: I8CO) -> impl Iterator<Item = I8CO> {
207 let i = self
208 .intervals
209 .partition_point(|iv| iv.end_excl() <= query.start());
210
211 self.intervals[i..]
212 .iter()
213 .copied()
214 .take_while(move |iv| iv.start() < query.end_excl())
215 }
216
217 /// Iterates over clipped intersection segments with `query`.
218 ///
219 /// Example:
220 ///
221 /// ```text
222 /// set: [10, 20), [30, 40)
223 /// query: [15, 35)
224 /// out: [15, 20), [30, 35)
225 /// ```
226 ///
227 /// Complexity: `O(log n + k)`, where `k` is the number of
228 /// intersecting intervals.
229 #[inline]
230 pub fn intersections(&self, query: I8CO) -> impl Iterator<Item = I8CO> {
231 self.intervals_intersecting(query)
232 .filter_map(move |iv| iv.intersection(query))
233 }
234 }
235 }
236 }
237
238 // ------------------------------------------------------------
239 // coverage api: covered length / uncovered length / ratio
240 // ------------------------------------------------------------
241
242 mod coverage {
243 use super::*;
244
245 impl I8COSet {
246 /// Returns the covered length inside `query`.
247 ///
248 /// Since `I8COSet` is canonical, all intersection segments are
249 /// disjoint, so summing their lengths is valid.
250 ///
251 /// The result is always `<= query.len()`.
252 #[inline]
253 pub fn covered_len(&self, query: I8CO) -> u8 {
254 self.intersections(query).map(|iv| iv.len()).sum()
255 }
256
257 /// Returns the uncovered length inside `query`.
258 #[inline]
259 pub fn uncovered_len(&self, query: I8CO) -> u8 {
260 query.len() - self.covered_len(query)
261 }
262
263 /// Returns `covered_len(query) / query.len()` as `f32`.
264 ///
265 /// `query.len()` is non-zero because `I8CO` cannot represent an
266 /// empty interval.
267 #[inline]
268 pub fn coverage_ratio(&self, query: I8CO) -> f32 {
269 self.covered_len(query) as f32 / query.len() as f32
270 }
271 }
272 }
273}
274
275/// Concurrent builder for `I8COSet`.
276///
277/// The builder accepts concurrent inserts into a skip list. During `seal`,
278/// raw intervals are scanned in sorted order and merged into a canonical
279/// immutable `I8COSet`.
280pub mod builder {
281 use std::cmp::Ordering;
282
283 use crossbeam_skiplist::SkipSet;
284
285 use super::set::I8COSet;
286 use super::*;
287
288 /// Private ordering key for storing `I8CO` inside `SkipSet`.
289 ///
290 /// `I8CO` does not need to implement `Ord` in its own crate. This wrapper
291 /// defines the ordering locally:
292 ///
293 /// ```text
294 /// (start, end_excl)
295 /// ```
296 ///
297 /// Because this is a set key, duplicate identical intervals are naturally
298 /// deduplicated during the build phase. That is correct for interval-set
299 /// semantics.
300 #[repr(transparent)]
301 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
302 struct Key(I8CO);
303
304 impl Ord for Key {
305 #[inline]
306 fn cmp(&self, other: &Self) -> Ordering {
307 self.0
308 .start()
309 .cmp(&other.0.start())
310 .then_with(|| self.0.end_excl().cmp(&other.0.end_excl()))
311 }
312 }
313
314 impl PartialOrd for Key {
315 #[inline]
316 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
317 Some(self.cmp(other))
318 }
319 }
320
321 /// Concurrent write-side builder for `I8COSet`.
322 ///
323 /// Insertions are expected `O(log n)` through `crossbeam_skiplist`.
324 /// No merging is performed during insertion; normalization happens once
325 /// in `seal`.
326 #[repr(transparent)]
327 #[derive(Debug, Default)]
328 pub struct I8COSetBuilder {
329 raw: SkipSet<Key>,
330 }
331
332 impl I8COSetBuilder {
333 /// Creates an empty builder.
334 #[inline]
335 pub fn new() -> Self {
336 Self::default()
337 }
338
339 /// Inserts one interval into the builder.
340 ///
341 /// This method is safe to call concurrently through shared references.
342 /// Identical intervals are deduplicated by the underlying `SkipSet`.
343 #[inline]
344 pub fn insert(&self, iv: I8CO) {
345 self.raw.insert(Key(iv));
346 }
347
348 /// Consumes the builder and returns a canonical immutable set.
349 ///
350 /// The merge process is linear over the sorted skip-list iterator:
351 ///
352 /// - maintain one pending interval `cur`;
353 /// - if the next interval overlaps or is adjacent, replace `cur` with
354 /// its convex hull;
355 /// - otherwise, push `cur` and start a new pending interval;
356 /// - finally, push the last pending interval.
357 pub fn seal(self) -> I8COSet {
358 let mut iter = self.raw.iter().map(|entry| entry.value().0);
359
360 let Some(mut cur) = iter.next() else {
361 // SAFETY:
362 // The empty interval list is canonical.
363 return unsafe { I8COSet::new_unchecked(Vec::new()) };
364 };
365
366 let mut out = Vec::new();
367
368 for iv in iter {
369 if cur.is_contiguous_with(iv) {
370 cur = cur.convex_hull(iv);
371 } else {
372 out.push(cur);
373 cur = iv;
374 }
375 }
376
377 out.push(cur);
378
379 // SAFETY:
380 // `self.raw` iterates keys in ascending `(start, end_excl)` order.
381 // The loop maintains one pending merged interval `cur`.
382 //
383 // If `iv` is contiguous with or overlaps `cur`, both are replaced
384 // by their convex hull, so no overlap or adjacency is emitted.
385 //
386 // If `iv` is disjoint from `cur`, then sorted order plus the failed
387 // contiguity test imply `cur.end_excl() < iv.start()`. Pushing
388 // `cur` therefore preserves the canonical invariant.
389 //
390 // After the loop, the final pending interval is pushed. Therefore
391 // `out` is sorted, non-overlapping, and contains no adjacent
392 // intervals.
393 unsafe { I8COSet::new_unchecked(out) }
394 }
395 }
396}